From e69f61862ab833e9b8d3c15b6ce07fd69f3bfecc Mon Sep 17 00:00:00 2001
From: Yacine Belkadi <yacine.belkadi.1@gmail.com>
Date: Fri, 12 Jul 2013 20:45:47 +0200
Subject: sched: Fix some kernel-doc warnings

When building the htmldocs (in verbose mode), scripts/kernel-doc
reports the follwing type of warnings:

  Warning(kernel/sched/core.c:936): No description found for return value of 'task_curr'
  ...

Fix those by:

 - adding the missing descriptions
 - using "Return" sections for the descriptions

Signed-off-by: Yacine Belkadi <yacine.belkadi.1@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1373654747-2389-1-git-send-email-yacine.belkadi.1@gmail.com
[ While at it, fix the cpupri_set() explanation. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50d04b92ceda..82300247974c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1532,6 +1532,8 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
  * Test if a process is not yet dead (at most zombie state)
  * If pid_alive fails, then pointers within the task structure
  * can be stale and must not be dereferenced.
+ *
+ * Return: 1 if the process is alive. 0 otherwise.
  */
 static inline int pid_alive(struct task_struct *p)
 {
@@ -1543,6 +1545,8 @@ static inline int pid_alive(struct task_struct *p)
  * @tsk: Task structure to be checked.
  *
  * Check if a task structure is the first user space task the kernel created.
+ *
+ * Return: 1 if the task structure is init. 0 otherwise.
  */
 static inline int is_global_init(struct task_struct *tsk)
 {
@@ -1893,6 +1897,8 @@ extern struct task_struct *idle_task(int cpu);
 /**
  * is_idle_task - is the specified task an idle task?
  * @p: the task in question.
+ *
+ * Return: 1 if @p is an idle task. 0 otherwise.
  */
 static inline bool is_idle_task(const struct task_struct *p)
 {
-- 
cgit v1.2.3


From a1a8e1dc111d6f05e7164e851e58219d428359e1 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 16 Jul 2013 15:28:00 +0100
Subject: iio:trigger: Fix use_count race condition

When using more than one trigger consumer it can happen that multiple threads
perform a read-modify-update cycle on 'use_count' concurrently. This can cause
updates to be lost and use_count can get stuck at non-zero value, in which case
the IIO core assumes that at least one thread is still running and will wait for
it to finish before running any trigger handlers again. This effectively renders
the trigger disabled and a reboot is necessary before it can be used again. To
fix this make use_count an atomic variable. Also set it to the number of
consumers before starting the first consumer, otherwise it might happen that
use_count drops to 0 even though not all consumers have been run yet.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Tested-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-trigger.c | 34 ++++++++++++++++++++++------------
 include/linux/iio/trigger.h        |  3 ++-
 2 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index ea8a4146620d..0dd9bb873130 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -127,12 +127,17 @@ static struct iio_trigger *iio_trigger_find_by_name(const char *name,
 void iio_trigger_poll(struct iio_trigger *trig, s64 time)
 {
 	int i;
-	if (!trig->use_count)
-		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++)
-			if (trig->subirqs[i].enabled) {
-				trig->use_count++;
+
+	if (!atomic_read(&trig->use_count)) {
+		atomic_set(&trig->use_count, CONFIG_IIO_CONSUMERS_PER_TRIGGER);
+
+		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) {
+			if (trig->subirqs[i].enabled)
 				generic_handle_irq(trig->subirq_base + i);
-			}
+			else
+				iio_trigger_notify_done(trig);
+		}
+	}
 }
 EXPORT_SYMBOL(iio_trigger_poll);
 
@@ -146,19 +151,24 @@ EXPORT_SYMBOL(iio_trigger_generic_data_rdy_poll);
 void iio_trigger_poll_chained(struct iio_trigger *trig, s64 time)
 {
 	int i;
-	if (!trig->use_count)
-		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++)
-			if (trig->subirqs[i].enabled) {
-				trig->use_count++;
+
+	if (!atomic_read(&trig->use_count)) {
+		atomic_set(&trig->use_count, CONFIG_IIO_CONSUMERS_PER_TRIGGER);
+
+		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) {
+			if (trig->subirqs[i].enabled)
 				handle_nested_irq(trig->subirq_base + i);
-			}
+			else
+				iio_trigger_notify_done(trig);
+		}
+	}
 }
 EXPORT_SYMBOL(iio_trigger_poll_chained);
 
 void iio_trigger_notify_done(struct iio_trigger *trig)
 {
-	trig->use_count--;
-	if (trig->use_count == 0 && trig->ops && trig->ops->try_reenable)
+	if (atomic_dec_and_test(&trig->use_count) && trig->ops &&
+		trig->ops->try_reenable)
 		if (trig->ops->try_reenable(trig))
 			/* Missed an interrupt so launch new poll now */
 			iio_trigger_poll(trig, 0);
diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index 3869c525b052..369cf2cd5144 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -8,6 +8,7 @@
  */
 #include <linux/irq.h>
 #include <linux/module.h>
+#include <linux/atomic.h>
 
 #ifndef _IIO_TRIGGER_H_
 #define _IIO_TRIGGER_H_
@@ -61,7 +62,7 @@ struct iio_trigger {
 
 	struct list_head		list;
 	struct list_head		alloc_list;
-	int use_count;
+	atomic_t			use_count;
 
 	struct irq_chip			subirq_chip;
 	int				subirq_base;
-- 
cgit v1.2.3


From b1451e546899bc8f450773b2af02e0cd000cf1fa Mon Sep 17 00:00:00 2001
From: "Patil, Rachna" <rachna@ti.com>
Date: Sat, 20 Jul 2013 17:27:00 +0100
Subject: iio: ti_am335x_adc: Fix wrong samples received on 1st read

Previously we tried to read data form ADC even before ADC sequencer
finished sampling. This led to wrong samples.
We now wait on ADC status register idle bit to be set.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Signed-off-by: Zubair Lutfullah <zubair.lutfullah@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/ti_am335x_adc.c      | 30 ++++++++++++++++++++++--------
 include/linux/mfd/ti_am335x_tscadc.h | 16 ++++++++++++++++
 2 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index 0ad208a69c29..3ceac3e91dde 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -60,7 +60,6 @@ static void tiadc_step_config(struct tiadc_device *adc_dev)
 {
 	unsigned int stepconfig;
 	int i, steps;
-	u32 step_en;
 
 	/*
 	 * There are 16 configurable steps and 8 analog input
@@ -86,8 +85,7 @@ static void tiadc_step_config(struct tiadc_device *adc_dev)
 		adc_dev->channel_step[i] = steps;
 		steps++;
 	}
-	step_en = get_adc_step_mask(adc_dev);
-	am335x_tsc_se_set(adc_dev->mfd_tscadc, step_en);
+
 }
 
 static const char * const chan_name_ain[] = {
@@ -142,10 +140,22 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
 		int *val, int *val2, long mask)
 {
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
-	int i;
-	unsigned int fifo1count, read;
+	int i, map_val;
+	unsigned int fifo1count, read, stepid;
 	u32 step = UINT_MAX;
 	bool found = false;
+	u32 step_en;
+	unsigned long timeout = jiffies + usecs_to_jiffies
+				(IDLE_TIMEOUT * adc_dev->channels);
+	step_en = get_adc_step_mask(adc_dev);
+	am335x_tsc_se_set(adc_dev->mfd_tscadc, step_en);
+
+	/* Wait for ADC sequencer to complete sampling */
+	while (tiadc_readl(adc_dev, REG_ADCFSM) & SEQ_STATUS) {
+		if (time_after(jiffies, timeout))
+			return -EAGAIN;
+		}
+	map_val = chan->channel + TOTAL_CHANNELS;
 
 	/*
 	 * When the sub-system is first enabled,
@@ -170,12 +180,16 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
 	fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
 	for (i = 0; i < fifo1count; i++) {
 		read = tiadc_readl(adc_dev, REG_FIFO1);
-		if (read >> 16 == step) {
-			*val = read & 0xfff;
+		stepid = read & FIFOREAD_CHNLID_MASK;
+		stepid = stepid >> 0x10;
+
+		if (stepid == map_val) {
+			read = read & FIFOREAD_DATA_MASK;
 			found = true;
+			*val = read;
 		}
 	}
-	am335x_tsc_se_update(adc_dev->mfd_tscadc);
+
 	if (found == false)
 		return -EBUSY;
 	return IIO_VAL_INT;
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 8d73fe29796a..db1791bb997a 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -113,11 +113,27 @@
 #define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
 #define CNTRLREG_TSCENB		BIT(7)
 
+/* FIFO READ Register */
+#define FIFOREAD_DATA_MASK (0xfff << 0)
+#define FIFOREAD_CHNLID_MASK (0xf << 16)
+
+/* Sequencer Status */
+#define SEQ_STATUS BIT(5)
+
 #define ADC_CLK			3000000
 #define	MAX_CLK_DIV		7
 #define TOTAL_STEPS		16
 #define TOTAL_CHANNELS		8
 
+/*
+* ADC runs at 3MHz, and it takes
+* 15 cycles to latch one data output.
+* Hence the idle time for ADC to
+* process one sample data would be
+* around 5 micro seconds.
+*/
+#define IDLE_TIMEOUT 5 /* microsec */
+
 #define TSCADC_CELLS		2
 
 struct ti_tscadc_dev {
-- 
cgit v1.2.3


From 2816c551c796ec14620325b2c9ed75b9979d3125 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 29 Jul 2013 19:50:33 +0200
Subject: tracing: trace_remove_event_call() should fail if call/file is in use

Change trace_remove_event_call(call) to return the error if this
call is active. This is what the callers assume but can't verify
outside of the tracing locks. Both trace_kprobe.c/trace_uprobe.c
need the additional changes, unregister_trace_probe() should abort
if trace_remove_event_call() fails.

The caller is going to free this call/file so we must ensure that
nobody can use them after trace_remove_event_call() succeeds.
debugfs should be fine after the previous changes and event_remove()
does TRACE_REG_UNREGISTER, but still there are 2 reasons why we need
the additional checks:

- There could be a perf_event(s) attached to this tp_event, so the
  patch checks ->perf_refcount.

- TRACE_REG_UNREGISTER can be suppressed by FTRACE_EVENT_FL_SOFT_MODE,
  so we simply check FTRACE_EVENT_FL_ENABLED protected by event_mutex.

Link: http://lkml.kernel.org/r/20130729175033.GB26284@redhat.com

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  2 +-
 kernel/trace/trace_events.c  | 35 +++++++++++++++++++++++++++++++++--
 2 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4372658c73ae..f98ab063e95e 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -332,7 +332,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
 			      int is_signed, int filter_type);
 extern int trace_add_event_call(struct ftrace_event_call *call);
-extern void trace_remove_event_call(struct ftrace_event_call *call);
+extern int trace_remove_event_call(struct ftrace_event_call *call);
 
 #define is_signed_type(type)	(((type)(-1)) < (type)1)
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index a67c913e2f9f..ec04836273c0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1713,16 +1713,47 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
 	destroy_preds(call);
 }
 
+static int probe_remove_event_call(struct ftrace_event_call *call)
+{
+	struct trace_array *tr;
+	struct ftrace_event_file *file;
+
+#ifdef CONFIG_PERF_EVENTS
+	if (call->perf_refcount)
+		return -EBUSY;
+#endif
+	do_for_each_event_file(tr, file) {
+		if (file->event_call != call)
+			continue;
+		/*
+		 * We can't rely on ftrace_event_enable_disable(enable => 0)
+		 * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress
+		 * TRACE_REG_UNREGISTER.
+		 */
+		if (file->flags & FTRACE_EVENT_FL_ENABLED)
+			return -EBUSY;
+		break;
+	} while_for_each_event_file();
+
+	__trace_remove_event_call(call);
+
+	return 0;
+}
+
 /* Remove an event_call */
-void trace_remove_event_call(struct ftrace_event_call *call)
+int trace_remove_event_call(struct ftrace_event_call *call)
 {
+	int ret;
+
 	mutex_lock(&trace_types_lock);
 	mutex_lock(&event_mutex);
 	down_write(&trace_event_sem);
-	__trace_remove_event_call(call);
+	ret = probe_remove_event_call(call);
 	up_write(&trace_event_sem);
 	mutex_unlock(&event_mutex);
 	mutex_unlock(&trace_types_lock);
+
+	return ret;
 }
 
 #define for_each_event(event, start, end)			\
-- 
cgit v1.2.3


From ed5467da0e369e65b247b99eb6403cb79172bcda Mon Sep 17 00:00:00 2001
From: Andrew Vagin <avagin@openvz.org>
Date: Fri, 2 Aug 2013 21:16:43 +0400
Subject: tracing: Fix fields of struct trace_iterator that are zeroed by
 mistake

tracing_read_pipe zeros all fields bellow "seq". The declaration contains
a comment about that, but it doesn't help.

The first field is "snapshot", it's true when current open file is
snapshot. Looks obvious, that it should not be zeroed.

The second field is "started". It was converted from cpumask_t to
cpumask_var_t (v2.6.28-4983-g4462344), in other words it was
converted from cpumask to pointer on cpumask.

Currently the reference on "started" memory is lost after the first read
from tracing_read_pipe and a proper object will never be freed.

The "started" is never dereferenced for trace_pipe, because trace_pipe
can't have the TRACE_FILE_ANNOTATE options.

Link: http://lkml.kernel.org/r/1375463803-3085183-1-git-send-email-avagin@openvz.org

Cc: stable@vger.kernel.org # 2.6.30
Signed-off-by: Andrew Vagin <avagin@openvz.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 10 ++++++----
 kernel/trace/trace.c         |  1 +
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index f98ab063e95e..120d57a1c3a5 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -78,6 +78,11 @@ struct trace_iterator {
 	/* trace_seq for __print_flags() and __print_symbolic() etc. */
 	struct trace_seq	tmp_seq;
 
+	cpumask_var_t		started;
+
+	/* it's true when current open file is snapshot */
+	bool			snapshot;
+
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
 	struct trace_entry	*ent;
@@ -90,10 +95,7 @@ struct trace_iterator {
 	loff_t			pos;
 	long			idx;
 
-	cpumask_var_t		started;
-
-	/* it's true when current open file is snapshot */
-	bool			snapshot;
+	/* All new field here will be zeroed out in pipe_read */
 };
 
 enum trace_iter_flags {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 882ec1dd1515..f5b35a5e852f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4151,6 +4151,7 @@ waitagain:
 	memset(&iter->seq, 0,
 	       sizeof(struct trace_iterator) -
 	       offsetof(struct trace_iterator, seq));
+	cpumask_clear(iter->started);
 	iter->pos = -1;
 
 	trace_event_read_lock();
-- 
cgit v1.2.3


From e67bc51e574ffe3c4bc1e09cab7658b1e780b4ce Mon Sep 17 00:00:00 2001
From: Dhaval Giani <dhaval.giani@gmail.com>
Date: Fri, 2 Aug 2013 14:47:29 -0400
Subject: tracing: Fix trace_dump_stack() proto when CONFIG_TRACING is not set

When CONFIG_TRACING is not enabled, the stub prototype for trace_dump_stack()
is incorrect. It has (void) when it should be (int).

Link: http://lkml.kernel.org/r/CAPhKKr_H=ukFnBL4WgDOVT5ay2xeF-Ho+CA0DWZX0E2JW-=vSQ@mail.gmail.com

Signed-off-by: Dhaval Giani <dhaval.giani@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3bef14c6586b..482ad2d84a32 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -629,7 +629,7 @@ extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
 static inline void ftrace_off_permanent(void) { }
-static inline void trace_dump_stack(void) { }
+static inline void trace_dump_stack(int skip) { }
 
 static inline void tracing_on(void) { }
 static inline void tracing_off(void) { }
-- 
cgit v1.2.3


From 7d46daba8dd5df1aa45724518a041ef7163d3ad5 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Mon, 5 Aug 2013 16:05:32 +0300
Subject: mlx5: remove health handler plugin

Remove this code, per Dave Miller's request, since it is not being used
anywhere in the kernel.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 29 +-----------------------
 include/linux/mlx5/driver.h                      |  3 ---
 2 files changed, 1 insertion(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 748f10a155c4..3e6670c4a7cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -55,33 +55,9 @@ enum {
 };
 
 static DEFINE_SPINLOCK(health_lock);
-
 static LIST_HEAD(health_list);
 static struct work_struct health_work;
 
-static health_handler_t reg_handler;
-int mlx5_register_health_report_handler(health_handler_t handler)
-{
-	spin_lock_irq(&health_lock);
-	if (reg_handler) {
-		spin_unlock_irq(&health_lock);
-		return -EEXIST;
-	}
-	reg_handler = handler;
-	spin_unlock_irq(&health_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL(mlx5_register_health_report_handler);
-
-void mlx5_unregister_health_report_handler(void)
-{
-	spin_lock_irq(&health_lock);
-	reg_handler = NULL;
-	spin_unlock_irq(&health_lock);
-}
-EXPORT_SYMBOL(mlx5_unregister_health_report_handler);
-
 static void health_care(struct work_struct *work)
 {
 	struct mlx5_core_health *health, *n;
@@ -98,11 +74,8 @@ static void health_care(struct work_struct *work)
 		priv = container_of(health, struct mlx5_priv, health);
 		dev = container_of(priv, struct mlx5_core_dev, priv);
 		mlx5_core_warn(dev, "handling bad device here\n");
+		/* nothing yet */
 		spin_lock_irq(&health_lock);
-		if (reg_handler)
-			reg_handler(dev->pdev, health->health,
-				    sizeof(health->health));
-
 		list_del_init(&health->list);
 		spin_unlock_irq(&health_lock);
 	}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2aa258b0ced1..611e65e76b00 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -731,9 +731,6 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
 void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
 
-typedef void (*health_handler_t)(struct pci_dev *pdev, struct health_buffer __iomem *buf, int size);
-int mlx5_register_health_report_handler(health_handler_t handler);
-void mlx5_unregister_health_report_handler(void);
 const char *mlx5_command_str(int command);
 int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 49ccc142f9cbc33fdda18e8fa90c1c5b4a79c0ad Mon Sep 17 00:00:00 2001
From: Mateusz Krawczuk <m.krawczuk@partner.samsung.com>
Date: Tue, 6 Aug 2013 18:34:40 +0200
Subject: regmap: Add missing header for !CONFIG_REGMAP stubs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

regmap.h requires linux/err.h if CONFIG_REGMAP is not defined. Without it I get
error.
CC      drivers/media/platform/exynos4-is/fimc-reg.o
In file included from drivers/media/platform/exynos4-is/fimc-reg.c:14:0:
include/linux/regmap.h: In function ‘regmap_write’:
include/linux/regmap.h:525:10: error: ‘EINVAL’ undeclared (first use in this function)
include/linux/regmap.h:525:10: note: each undeclared identifier is reported only once for each function it appears in

Signed-off-by: Mateusz Krawczuk <m.krawczuk@partner.samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
Cc: stable@kernel.org
---
 include/linux/regmap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 75981d0b57dc..580a5320cc96 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -15,6 +15,7 @@
 
 #include <linux/list.h>
 #include <linux/rbtree.h>
+#include <linux/err.h>
 
 struct module;
 struct device;
-- 
cgit v1.2.3


From 786615bc1ce84150ded80daea6bd9f6297f48e73 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 5 Aug 2013 16:04:47 -0400
Subject: SUNRPC: If the rpcbind channel is disconnected, fail the call to
 unregister

If rpcbind causes our connection to the AF_LOCAL socket to close after
we've registered a service, then we want to be careful about reconnecting
since the mount namespace may have changed.

By simply refusing to reconnect the AF_LOCAL socket in the case of
unregister, we avoid the need to somehow save the mount namespace. While
this may lead to some services not unregistering properly, it should
be safe.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Nix <nix@esperi.org.uk>
Cc: Jeff Layton <jlayton@redhat.com>
Cc: stable@vger.kernel.org # 3.9.x
---
 include/linux/sunrpc/sched.h |  1 +
 net/sunrpc/clnt.c            |  4 ++++
 net/sunrpc/netns.h           |  1 +
 net/sunrpc/rpcb_clnt.c       | 40 +++++++++++++++++++++++++++-------------
 4 files changed, 33 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 6d870353674a..1821445708d6 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -121,6 +121,7 @@ struct rpc_task_setup {
 #define RPC_TASK_SOFTCONN	0x0400		/* Fail if can't connect */
 #define RPC_TASK_SENT		0x0800		/* message was sent */
 #define RPC_TASK_TIMEOUT	0x1000		/* fail with ETIMEDOUT on timeout */
+#define RPC_TASK_NOCONNECT	0x2000		/* return ENOTCONN if not connected */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 74f6a704e374..ecbc4e3d83ad 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1660,6 +1660,10 @@ call_connect(struct rpc_task *task)
 		task->tk_action = call_connect_status;
 		if (task->tk_status < 0)
 			return;
+		if (task->tk_flags & RPC_TASK_NOCONNECT) {
+			rpc_exit(task, -ENOTCONN);
+			return;
+		}
 		xprt_connect(task);
 	}
 }
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 74d948f5d5a1..779742cfc1ff 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -23,6 +23,7 @@ struct sunrpc_net {
 	struct rpc_clnt *rpcb_local_clnt4;
 	spinlock_t rpcb_clnt_lock;
 	unsigned int rpcb_users;
+	unsigned int rpcb_is_af_local : 1;
 
 	struct mutex gssp_lock;
 	wait_queue_head_t gssp_wq;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index b0f723227157..1891a1022c17 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -204,13 +204,15 @@ void rpcb_put_local(struct net *net)
 }
 
 static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
-			struct rpc_clnt *clnt4)
+			struct rpc_clnt *clnt4,
+			bool is_af_local)
 {
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
 	/* Protected by rpcb_create_local_mutex */
 	sn->rpcb_local_clnt = clnt;
 	sn->rpcb_local_clnt4 = clnt4;
+	sn->rpcb_is_af_local = is_af_local ? 1 : 0;
 	smp_wmb(); 
 	sn->rpcb_users = 1;
 	dprintk("RPC:       created new rpcb local clients (rpcb_local_clnt: "
@@ -271,7 +273,7 @@ static int rpcb_create_local_unix(struct net *net)
 		clnt4 = NULL;
 	}
 
-	rpcb_set_local(net, clnt, clnt4);
+	rpcb_set_local(net, clnt, clnt4, true);
 
 out:
 	return result;
@@ -323,7 +325,7 @@ static int rpcb_create_local_net(struct net *net)
 		clnt4 = NULL;
 	}
 
-	rpcb_set_local(net, clnt, clnt4);
+	rpcb_set_local(net, clnt, clnt4, false);
 
 out:
 	return result;
@@ -384,13 +386,16 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname,
 	return rpc_create(&args);
 }
 
-static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg)
+static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set)
 {
-	int result, error = 0;
+	int flags = RPC_TASK_NOCONNECT;
+	int error, result = 0;
 
+	if (is_set || !sn->rpcb_is_af_local)
+		flags = RPC_TASK_SOFTCONN;
 	msg->rpc_resp = &result;
 
-	error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN);
+	error = rpc_call_sync(clnt, msg, flags);
 	if (error < 0) {
 		dprintk("RPC:       failed to contact local rpcbind "
 				"server (errno %d).\n", -error);
@@ -447,16 +452,19 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short
 		.rpc_argp	= &map,
 	};
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	bool is_set = false;
 
 	dprintk("RPC:       %sregistering (%u, %u, %d, %u) with local "
 			"rpcbind\n", (port ? "" : "un"),
 			prog, vers, prot, port);
 
 	msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET];
-	if (port)
+	if (port != 0) {
 		msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
+		is_set = true;
+	}
 
-	return rpcb_register_call(sn->rpcb_local_clnt, &msg);
+	return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set);
 }
 
 /*
@@ -469,6 +477,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn,
 	const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
 	struct rpcbind_args *map = msg->rpc_argp;
 	unsigned short port = ntohs(sin->sin_port);
+	bool is_set = false;
 	int result;
 
 	map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
@@ -479,10 +488,12 @@ static int rpcb_register_inet4(struct sunrpc_net *sn,
 			map->r_addr, map->r_netid);
 
 	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
-	if (port)
+	if (port != 0) {
 		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+		is_set = true;
+	}
 
-	result = rpcb_register_call(sn->rpcb_local_clnt4, msg);
+	result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set);
 	kfree(map->r_addr);
 	return result;
 }
@@ -497,6 +508,7 @@ static int rpcb_register_inet6(struct sunrpc_net *sn,
 	const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
 	struct rpcbind_args *map = msg->rpc_argp;
 	unsigned short port = ntohs(sin6->sin6_port);
+	bool is_set = false;
 	int result;
 
 	map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
@@ -507,10 +519,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn,
 			map->r_addr, map->r_netid);
 
 	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
-	if (port)
+	if (port != 0) {
 		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+		is_set = true;
+	}
 
-	result = rpcb_register_call(sn->rpcb_local_clnt4, msg);
+	result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set);
 	kfree(map->r_addr);
 	return result;
 }
@@ -527,7 +541,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn,
 	map->r_addr = "";
 	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
 
-	return rpcb_register_call(sn->rpcb_local_clnt4, msg);
+	return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false);
 }
 
 /**
-- 
cgit v1.2.3


From 8742f229b635bf1c1c84a3dfe5e47c814c20b5c8 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 8 Aug 2013 18:55:32 +0200
Subject: userns: limit the maximum depth of user_namespace->parent chain

Ensure that user_namespace->parent chain can't grow too much.
Currently we use the hardroded 32 as limit.

Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/user_namespace.h | 1 +
 kernel/user_namespace.c        | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index b6b215f13b45..14105c26a836 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -23,6 +23,7 @@ struct user_namespace {
 	struct uid_gid_map	projid_map;
 	atomic_t		count;
 	struct user_namespace	*parent;
+	int			level;
 	kuid_t			owner;
 	kgid_t			group;
 	unsigned int		proc_inum;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 6e50a44610ee..9064b919a406 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -62,6 +62,9 @@ int create_user_ns(struct cred *new)
 	kgid_t group = new->egid;
 	int ret;
 
+	if (parent_ns->level > 32)
+		return -EUSERS;
+
 	/*
 	 * Verify that we can not violate the policy of which files
 	 * may be accessed that is specified by the root directory,
@@ -92,6 +95,7 @@ int create_user_ns(struct cred *new)
 	atomic_set(&ns->count, 1);
 	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
+	ns->level = parent_ns->level + 1;
 	ns->owner = owner;
 	ns->group = group;
 
-- 
cgit v1.2.3


From e0acd0a68ec7dbf6b7a81a87a867ebd7ac9b76c4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 12 Aug 2013 18:14:00 +0200
Subject: sched: fix the theoretical signal_wake_up() vs schedule() race

This is only theoretical, but after try_to_wake_up(p) was changed
to check p->state under p->pi_lock the code like

	__set_current_state(TASK_INTERRUPTIBLE);
	schedule();

can miss a signal. This is the special case of wait-for-condition,
it relies on try_to_wake_up/schedule interaction and thus it does
not need mb() between __set_current_state() and if(signal_pending).

However, this __set_current_state() can move into the critical
section protected by rq->lock, now that try_to_wake_up() takes
another lock we need to ensure that it can't be reordered with
"if (signal_pending(current))" check inside that section.

The patch is actually one-liner, it simply adds smp_wmb() before
spin_lock_irq(rq->lock). This is what try_to_wake_up() already
does by the same reason.

We turn this wmb() into the new helper, smp_mb__before_spinlock(),
for better documentation and to allow the architectures to change
the default implementation.

While at it, kill smp_mb__after_lock(), it has no callers.

Perhaps we can also add smp_mb__before/after_spinunlock() for
prepare_to_wait().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/spinlock.h |  4 ----
 include/linux/spinlock.h        | 14 +++++++++++---
 kernel/sched/core.c             | 14 +++++++++++++-
 3 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 33692eaabab5..e3ddd7db723f 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -233,8 +233,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 #define arch_read_relax(lock)	cpu_relax()
 #define arch_write_relax(lock)	cpu_relax()
 
-/* The {read|write|spin}_lock() on x86 are full memory barriers. */
-static inline void smp_mb__after_lock(void) { }
-#define ARCH_HAS_SMP_MB_AFTER_LOCK
-
 #endif /* _ASM_X86_SPINLOCK_H */
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 7d537ced949a..75f34949d9ab 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -117,9 +117,17 @@ do {								\
 #endif /*arch_spin_is_contended*/
 #endif
 
-/* The lock does not imply full memory barrier. */
-#ifndef ARCH_HAS_SMP_MB_AFTER_LOCK
-static inline void smp_mb__after_lock(void) { smp_mb(); }
+/*
+ * Despite its name it doesn't necessarily has to be a full barrier.
+ * It should only guarantee that a STORE before the critical section
+ * can not be reordered with a LOAD inside this section.
+ * spin_lock() is the one-way barrier, this LOAD can not escape out
+ * of the region. So the default implementation simply ensures that
+ * a STORE can not move into the critical section, smp_wmb() should
+ * serialize it with another STORE done by spin_lock().
+ */
+#ifndef smp_mb__before_spinlock
+#define smp_mb__before_spinlock()	smp_wmb()
 #endif
 
 /**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b7c32cb7bfeb..ef51b0ef4bdc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1491,7 +1491,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	unsigned long flags;
 	int cpu, success = 0;
 
-	smp_wmb();
+	/*
+	 * If we are going to wake up a thread waiting for CONDITION we
+	 * need to ensure that CONDITION=1 done by the caller can not be
+	 * reordered with p->state check below. This pairs with mb() in
+	 * set_current_state() the waiting thread does.
+	 */
+	smp_mb__before_spinlock();
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	if (!(p->state & state))
 		goto out;
@@ -2394,6 +2400,12 @@ need_resched:
 	if (sched_feat(HRTICK))
 		hrtick_clear(rq);
 
+	/*
+	 * Make sure that signal_pending_state()->signal_pending() below
+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
+	 * done by the caller to avoid the race with signal_wake_up().
+	 */
+	smp_mb__before_spinlock();
 	raw_spin_lock_irq(&rq->lock);
 
 	switch_count = &prev->nivcsw;
-- 
cgit v1.2.3


From 179ef71cbc085252e3fe6b8159263a7ed1d88ea4 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 13 Aug 2013 16:00:49 -0700
Subject: mm: save soft-dirty bits on swapped pages

Andy Lutomirski reported that if a page with _PAGE_SOFT_DIRTY bit set
get swapped out, the bit is getting lost and no longer available when
pte read back.

To resolve this we introduce _PTE_SWP_SOFT_DIRTY bit which is saved in
pte entry for the page being swapped out.  When such page is to be read
back from a swap cache we check for bit presence and if it's there we
clear it and restore the former _PAGE_SOFT_DIRTY bit back.

One of the problem was to find a place in pte entry where we can save
the _PTE_SWP_SOFT_DIRTY bit while page is in swap.  The _PAGE_PSE was
chosen for that, it doesn't intersect with swap entry format stored in
pte.

Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/pgtable.h       | 15 +++++++++++++++
 arch/x86/include/asm/pgtable_types.h | 13 +++++++++++++
 fs/proc/task_mmu.c                   | 21 +++++++++++++++------
 include/asm-generic/pgtable.h        | 15 +++++++++++++++
 include/linux/swapops.h              |  2 ++
 mm/memory.c                          |  2 ++
 mm/rmap.c                            |  6 +++++-
 mm/swapfile.c                        | 19 +++++++++++++++++--
 8 files changed, 84 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7dc305a46058..bd0518a7f197 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -314,6 +314,21 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
 	return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
 }
 
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+	return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline int pte_swp_soft_dirty(pte_t pte)
+{
+	return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
+}
+
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+	return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
+}
+
 /*
  * Mask out unsupported bits in a present pgprot.  Non-present pgprots
  * can use those bits for other purposes, so leave them be.
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index c98ac63aae48..5e8442f178f9 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -67,6 +67,19 @@
 #define _PAGE_SOFT_DIRTY	(_AT(pteval_t, 0))
 #endif
 
+/*
+ * Tracking soft dirty bit when a page goes to a swap is tricky.
+ * We need a bit which can be stored in pte _and_ not conflict
+ * with swap entry format. On x86 bits 6 and 7 are *not* involved
+ * into swap entry computation, but bit 6 is used for nonlinear
+ * file mapping, so we borrow bit 7 for soft dirty tracking.
+ */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SWP_SOFT_DIRTY	_PAGE_PSE
+#else
+#define _PAGE_SWP_SOFT_DIRTY	(_AT(pteval_t, 0))
+#endif
+
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
 #else
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index dbf61f6174f0..e2d9bdce5e7e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -730,8 +730,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
 	 * of how soft-dirty works.
 	 */
 	pte_t ptent = *pte;
-	ptent = pte_wrprotect(ptent);
-	ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+
+	if (pte_present(ptent)) {
+		ptent = pte_wrprotect(ptent);
+		ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+	} else if (is_swap_pte(ptent)) {
+		ptent = pte_swp_clear_soft_dirty(ptent);
+	}
+
 	set_pte_at(vma->vm_mm, addr, pte, ptent);
 #endif
 }
@@ -752,14 +758,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
 		ptent = *pte;
-		if (!pte_present(ptent))
-			continue;
 
 		if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
 			clear_soft_dirty(vma, addr, pte);
 			continue;
 		}
 
+		if (!pte_present(ptent))
+			continue;
+
 		page = vm_normal_page(vma, addr, ptent);
 		if (!page)
 			continue;
@@ -930,8 +937,10 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
 		flags = PM_PRESENT;
 		page = vm_normal_page(vma, addr, pte);
 	} else if (is_swap_pte(pte)) {
-		swp_entry_t entry = pte_to_swp_entry(pte);
-
+		swp_entry_t entry;
+		if (pte_swp_soft_dirty(pte))
+			flags2 |= __PM_SOFT_DIRTY;
+		entry = pte_to_swp_entry(pte);
 		frame = swp_type(entry) |
 			(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
 		flags = PM_SWAP;
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 2f47ade1b567..2a7e0d10ad9a 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -417,6 +417,21 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
 {
 	return pmd;
 }
+
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+	return pte;
+}
+
+static inline int pte_swp_soft_dirty(pte_t pte)
+{
+	return 0;
+}
+
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+	return pte;
+}
 #endif
 
 #ifndef __HAVE_PFNMAP_TRACKING
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index c5fd30d2a415..8d4fa82bfb91 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -67,6 +67,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
 	swp_entry_t arch_entry;
 
 	BUG_ON(pte_file(pte));
+	if (pte_swp_soft_dirty(pte))
+		pte = pte_swp_clear_soft_dirty(pte);
 	arch_entry = __pte_to_swp_entry(pte);
 	return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
diff --git a/mm/memory.c b/mm/memory.c
index 1ce2e2a734fc..e98ecad2b9c8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3115,6 +3115,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		exclusive = 1;
 	}
 	flush_icache_page(vma, page);
+	if (pte_swp_soft_dirty(orig_pte))
+		pte = pte_mksoft_dirty(pte);
 	set_pte_at(mm, address, page_table, pte);
 	if (page == swapcache)
 		do_page_add_anon_rmap(page, vma, address, exclusive);
diff --git a/mm/rmap.c b/mm/rmap.c
index cd356df4f71a..83325b80142b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1236,6 +1236,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			   swp_entry_to_pte(make_hwpoison_entry(page)));
 	} else if (PageAnon(page)) {
 		swp_entry_t entry = { .val = page_private(page) };
+		pte_t swp_pte;
 
 		if (PageSwapCache(page)) {
 			/*
@@ -1264,7 +1265,10 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
 			entry = make_migration_entry(page, pte_write(pteval));
 		}
-		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
+		swp_pte = swp_entry_to_pte(entry);
+		if (pte_soft_dirty(pteval))
+			swp_pte = pte_swp_mksoft_dirty(swp_pte);
+		set_pte_at(mm, address, pte, swp_pte);
 		BUG_ON(pte_file(*pte));
 	} else if (IS_ENABLED(CONFIG_MIGRATION) &&
 		   (TTU_ACTION(flags) == TTU_MIGRATION)) {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 36af6eeaa67e..6cf2e60983b7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -866,6 +866,21 @@ unsigned int count_swap_pages(int type, int free)
 }
 #endif /* CONFIG_HIBERNATION */
 
+static inline int maybe_same_pte(pte_t pte, pte_t swp_pte)
+{
+#ifdef CONFIG_MEM_SOFT_DIRTY
+	/*
+	 * When pte keeps soft dirty bit the pte generated
+	 * from swap entry does not has it, still it's same
+	 * pte from logical point of view.
+	 */
+	pte_t swp_pte_dirty = pte_swp_mksoft_dirty(swp_pte);
+	return pte_same(pte, swp_pte) || pte_same(pte, swp_pte_dirty);
+#else
+	return pte_same(pte, swp_pte);
+#endif
+}
+
 /*
  * No need to decide whether this PTE shares the swap entry with others,
  * just let do_wp_page work it out if a write is requested later - to
@@ -892,7 +907,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 	}
 
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-	if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
+	if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
 		mem_cgroup_cancel_charge_swapin(memcg);
 		ret = 0;
 		goto out;
@@ -947,7 +962,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		 * swapoff spends a _lot_ of time in this loop!
 		 * Test inline before going to call unuse_pte.
 		 */
-		if (unlikely(pte_same(*pte, swp_pte))) {
+		if (unlikely(maybe_same_pte(*pte, swp_pte))) {
 			pte_unmap(pte);
 			ret = unuse_pte(vma, pmd, addr, entry, page);
 			if (ret)
-- 
cgit v1.2.3


From dfa9771a7c4784bafd0673bc7abcee3813088b77 Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Tue, 13 Aug 2013 16:00:53 -0700
Subject: microblaze: fix clone syscall

Fix inadvertent breakage in the clone syscall ABI for Microblaze that
was introduced in commit f3268edbe6fe ("microblaze: switch to generic
fork/vfork/clone").

The Microblaze syscall ABI for clone takes the parent tid address in the
4th argument; the third argument slot is used for the stack size.  The
incorrectly-used CLONE_BACKWARDS type assigned parent tid to the 3rd
slot.

This commit restores the original ABI so that existing userspace libc
code will work correctly.

All kernel versions from v3.8-rc1 were affected.

Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig             | 6 ++++++
 arch/microblaze/Kconfig  | 2 +-
 include/linux/syscalls.h | 5 +++++
 kernel/fork.c            | 6 ++++++
 4 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 8d2ae24b9f4a..1feb169274fe 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -407,6 +407,12 @@ config CLONE_BACKWARDS2
 	help
 	  Architecture has the first two arguments of clone(2) swapped.
 
+config CLONE_BACKWARDS3
+	bool
+	help
+	  Architecture has tls passed as the 3rd argument of clone(2),
+	  not the 5th one.
+
 config ODD_RT_SIGACTION
 	bool
 	help
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index d22a4ecffff4..4fab52294d98 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -28,7 +28,7 @@ config MICROBLAZE
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IDLE_POLL_SETUP
 	select MODULES_USE_ELF_RELA
-	select CLONE_BACKWARDS
+	select CLONE_BACKWARDS3
 
 config SWAP
 	def_bool n
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4147d700a293..84662ecc7b51 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -802,9 +802,14 @@ asmlinkage long sys_vfork(void);
 asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int,
 	       int __user *);
 #else
+#ifdef CONFIG_CLONE_BACKWARDS3
+asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *,
+			  int __user *, int);
+#else
 asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
 	       int __user *, int);
 #endif
+#endif
 
 asmlinkage long sys_execve(const char __user *filename,
 		const char __user *const __user *argv,
diff --git a/kernel/fork.c b/kernel/fork.c
index 403d2bb8a968..e23bb19e2a3e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1679,6 +1679,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
 		 int __user *, parent_tidptr,
 		 int __user *, child_tidptr,
 		 int, tls_val)
+#elif defined(CONFIG_CLONE_BACKWARDS3)
+SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
+		int, stack_size,
+		int __user *, parent_tidptr,
+		int __user *, child_tidptr,
+		int, tls_val)
 #else
 SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
 		 int __user *, parent_tidptr,
-- 
cgit v1.2.3


From df54d6fa54275ce59660453e29d1228c2b45a826 Mon Sep 17 00:00:00 2001
From: Radu Caragea <sinaelgl@gmail.com>
Date: Tue, 13 Aug 2013 16:00:59 -0700
Subject: x86 get_unmapped_area(): use proper mmap base for bottom-up direction

When the stack is set to unlimited, the bottomup direction is used for
mmap-ings but the mmap_base is not used and thus effectively renders
ASLR for mmapings along with PIE useless.

Cc: Michel Lespinasse <walken@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Sendroiu <molecula2788@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/sys_x86_64.c | 2 +-
 arch/x86/mm/mmap.c           | 2 +-
 include/linux/sched.h        | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index dbded5aedb81..48f8375e4c6b 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
 				*begin = new_begin;
 		}
 	} else {
-		*begin = TASK_UNMAPPED_BASE;
+		*begin = mmap_legacy_base();
 		*end = TASK_SIZE;
 	}
 }
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 62c29a5bfe26..f63778cb2363 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -98,7 +98,7 @@ static unsigned long mmap_base(void)
  * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
  * does, but not when emulating X86_32
  */
-static unsigned long mmap_legacy_base(void)
+unsigned long mmap_legacy_base(void)
 {
 	if (mmap_is_ia32())
 		return TASK_UNMAPPED_BASE;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d722490da030..923dd6ea4a0e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -314,6 +314,7 @@ struct nsproxy;
 struct user_namespace;
 
 #ifdef CONFIG_MMU
+extern unsigned long mmap_legacy_base(void);
 extern void arch_pick_mmap_layout(struct mm_struct *mm);
 extern unsigned long
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
-- 
cgit v1.2.3


From 0a324f3189ed9c78b1aaf48d88e93cb18643c655 Mon Sep 17 00:00:00 2001
From: Moshe Lazer <moshel@mellanox.com>
Date: Wed, 14 Aug 2013 17:46:48 +0300
Subject: net/mlx5_core: Support MANAGE_PAGES and QUERY_PAGES firmware command
 changes

In the previous QUERY_PAGES command version we used one command to get the
required amount of boot, init and post init pages.  The new version uses the
op_mod field to specify whether the query is for the required amount of boot,
init or post init pages. In addition the output field size for the required
amount of pages increased from 16 to 32 bits.

In MANAGE_PAGES command the input_num_entries and output_num_entries fields
sizes changed from 16 to 32 bits and the PAS tables offset changed to 0x10.

In the pages request event the num_pages field also changed to 32 bits.

In the HCA-capabilities-layout the size and location of max_qp_mcg field has
been changed to support 24 bits.

This patch isn't compatible with firmware versions < 5; however, it  turns out that the
first GA firmware we will publish will not support previous versions so this should be OK.

Signed-off-by: Moshe Lazer <moshel@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c       |  2 +-
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c    | 58 ++++++++++------------
 include/linux/mlx5/device.h                        | 22 ++++----
 include/linux/mlx5/driver.h                        |  4 +-
 6 files changed, 41 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index c571de85d0f9..5472cbd34028 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -46,7 +46,7 @@
 #include "mlx5_core.h"
 
 enum {
-	CMD_IF_REV = 4,
+	CMD_IF_REV = 5,
 };
 
 enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c02cbcfd0fb8..443cc4d7b024 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -268,7 +268,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 		case MLX5_EVENT_TYPE_PAGE_REQUEST:
 			{
 				u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
-				s16 npages = be16_to_cpu(eqe->data.req_pages.num_pages);
+				s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
 
 				mlx5_core_dbg(dev, "page request for func 0x%x, napges %d\n", func_id, npages);
 				mlx5_core_req_pages_handler(dev, func_id, npages);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 72a5222447f5..f012658b6a92 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -113,7 +113,7 @@ int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev,
 	caps->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
 	caps->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
 	caps->log_max_mcg = out->hca_cap.log_max_mcg;
-	caps->max_qp_mcg = be16_to_cpu(out->hca_cap.max_qp_mcg);
+	caps->max_qp_mcg = be32_to_cpu(out->hca_cap.max_qp_mcg) & 0xffffff;
 	caps->max_ra_res_qp = 1 << (out->hca_cap.log_max_ra_res_qp & 0x3f);
 	caps->max_ra_req_qp = 1 << (out->hca_cap.log_max_ra_req_qp & 0x3f);
 	caps->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 4a3e137931a3..3a2408d44820 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -43,10 +43,16 @@ enum {
 	MLX5_PAGES_TAKE		= 2
 };
 
+enum {
+	MLX5_BOOT_PAGES		= 1,
+	MLX5_INIT_PAGES		= 2,
+	MLX5_POST_INIT_PAGES	= 3
+};
+
 struct mlx5_pages_req {
 	struct mlx5_core_dev *dev;
 	u32	func_id;
-	s16	npages;
+	s32	npages;
 	struct work_struct work;
 };
 
@@ -64,27 +70,23 @@ struct mlx5_query_pages_inbox {
 
 struct mlx5_query_pages_outbox {
 	struct mlx5_outbox_hdr	hdr;
-	__be16			num_boot_pages;
+	__be16			rsvd;
 	__be16			func_id;
-	__be16			init_pages;
-	__be16			num_pages;
+	__be32			num_pages;
 };
 
 struct mlx5_manage_pages_inbox {
 	struct mlx5_inbox_hdr	hdr;
-	__be16			rsvd0;
+	__be16			rsvd;
 	__be16			func_id;
-	__be16			rsvd1;
-	__be16			num_entries;
-	u8			rsvd2[16];
+	__be32			num_entries;
 	__be64			pas[0];
 };
 
 struct mlx5_manage_pages_outbox {
 	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[2];
-	__be16			num_entries;
-	u8			rsvd1[20];
+	__be32			num_entries;
+	u8			rsvd[4];
 	__be64			pas[0];
 };
 
@@ -146,7 +148,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
 }
 
 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
-				s16 *pages, s16 *init_pages, u16 *boot_pages)
+				s32 *npages, int boot)
 {
 	struct mlx5_query_pages_inbox	in;
 	struct mlx5_query_pages_outbox	out;
@@ -155,6 +157,8 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 	memset(&in, 0, sizeof(in));
 	memset(&out, 0, sizeof(out));
 	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES);
+	in.hdr.opmod = boot ? cpu_to_be16(MLX5_BOOT_PAGES) : cpu_to_be16(MLX5_INIT_PAGES);
+
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 	if (err)
 		return err;
@@ -162,15 +166,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 	if (out.hdr.status)
 		return mlx5_cmd_status_to_err(&out.hdr);
 
-	if (pages)
-		*pages = be16_to_cpu(out.num_pages);
-
-	if (init_pages)
-		*init_pages = be16_to_cpu(out.init_pages);
-
-	if (boot_pages)
-		*boot_pages = be16_to_cpu(out.num_boot_pages);
-
+	*npages = be32_to_cpu(out.num_pages);
 	*func_id = be16_to_cpu(out.func_id);
 
 	return err;
@@ -224,7 +220,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
 	in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE);
 	in->func_id = cpu_to_be16(func_id);
-	in->num_entries = cpu_to_be16(npages);
+	in->num_entries = cpu_to_be32(npages);
 	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
 	mlx5_core_dbg(dev, "err %d\n", err);
 	if (err) {
@@ -292,7 +288,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
 	in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE);
 	in.func_id = cpu_to_be16(func_id);
-	in.num_entries = cpu_to_be16(npages);
+	in.num_entries = cpu_to_be32(npages);
 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
 	if (err) {
@@ -306,7 +302,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 		goto out_free;
 	}
 
-	num_claimed = be16_to_cpu(out->num_entries);
+	num_claimed = be32_to_cpu(out->num_entries);
 	if (nclaimed)
 		*nclaimed = num_claimed;
 
@@ -345,7 +341,7 @@ static void pages_work_handler(struct work_struct *work)
 }
 
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
-				 s16 npages)
+				 s32 npages)
 {
 	struct mlx5_pages_req *req;
 
@@ -364,20 +360,18 @@ void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
 
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
 {
-	u16 uninitialized_var(boot_pages);
-	s16 uninitialized_var(init_pages);
 	u16 uninitialized_var(func_id);
+	s32 uninitialized_var(npages);
 	int err;
 
-	err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages,
-				   &boot_pages);
+	err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
 	if (err)
 		return err;
 
+	mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
+		      npages, boot ? "boot" : "init", func_id);
 
-	mlx5_core_dbg(dev, "requested %d init pages and %d boot pages for func_id 0x%x\n",
-		      init_pages, boot_pages, func_id);
-	return give_pages(dev, func_id, boot ? boot_pages : init_pages, 0);
+	return give_pages(dev, func_id, npages, 0);
 }
 
 static int optimal_reclaimed_pages(void)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 737685e9e852..68029b30c3dc 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -309,21 +309,20 @@ struct mlx5_hca_cap {
 	__be16	max_desc_sz_rq;
 	u8	rsvd21[2];
 	__be16	max_desc_sz_sq_dc;
-	u8	rsvd22[4];
-	__be16	max_qp_mcg;
-	u8	rsvd23;
+	__be32	max_qp_mcg;
+	u8	rsvd22[3];
 	u8	log_max_mcg;
-	u8	rsvd24;
+	u8	rsvd23;
 	u8	log_max_pd;
-	u8	rsvd25;
+	u8	rsvd24;
 	u8	log_max_xrcd;
-	u8	rsvd26[42];
+	u8	rsvd25[42];
 	__be16  log_uar_page_sz;
-	u8	rsvd27[28];
+	u8	rsvd26[28];
 	u8	log_msx_atomic_size_qp;
-	u8	rsvd28[2];
+	u8	rsvd27[2];
 	u8	log_msx_atomic_size_dc;
-	u8	rsvd29[76];
+	u8	rsvd28[76];
 };
 
 
@@ -472,9 +471,8 @@ struct mlx5_eqe_cmd {
 struct mlx5_eqe_page_req {
 	u8		rsvd0[2];
 	__be16		func_id;
-	u8		rsvd1[2];
-	__be16		num_pages;
-	__be32		rsvd2[5];
+	__be32		num_pages;
+	__be32		rsvd1[5];
 };
 
 union ev_data {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 611e65e76b00..8888381fc150 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -358,7 +358,7 @@ struct mlx5_caps {
 	u32	reserved_lkey;
 	u8	local_ca_ack_delay;
 	u8	log_max_mcg;
-	u16	max_qp_mcg;
+	u32	max_qp_mcg;
 	int	min_page_sz;
 };
 
@@ -691,7 +691,7 @@ void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
 int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
-				 s16 npages);
+				 s32 npages);
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
 void mlx5_register_debugfs(void);
-- 
cgit v1.2.3