From c196f6ee61e42df9acec797ab41a0c8d87e8aa2e Mon Sep 17 00:00:00 2001 From: Manjunath Hadli Date: Thu, 18 Oct 2012 07:54:59 -0300 Subject: [media] media: add new mediabus format enums for dm365 add new enum entries for supporting the media-bus formats on dm365. These include some bayer and some non-bayer formats. V4L2_MBUS_FMT_YDYUYDYV8_1X16 and V4L2_MBUS_FMT_UV8_1X8 are used internal to the hardware by the resizer. V4L2_MBUS_FMT_SBGGR10_ALAW8_1X8 represents the bayer ALAW format that is supported by dm365 hardware. Signed-off-by: Manjunath Hadli Signed-off-by: Lad, Prabhakar Acked-by: Sakari Ailus Acked-by: Laurent Pinchart Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-mediabus.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h index 7d64e0e1a18b..e860f55820ec 100644 --- a/include/uapi/linux/v4l2-mediabus.h +++ b/include/uapi/linux/v4l2-mediabus.h @@ -47,8 +47,9 @@ enum v4l2_mbus_pixelcode { V4L2_MBUS_FMT_RGB565_2X8_BE = 0x1007, V4L2_MBUS_FMT_RGB565_2X8_LE = 0x1008, - /* YUV (including grey) - next is 0x2014 */ + /* YUV (including grey) - next is 0x2016 */ V4L2_MBUS_FMT_Y8_1X8 = 0x2001, + V4L2_MBUS_FMT_UV8_1X8 = 0x2015, V4L2_MBUS_FMT_UYVY8_1_5X8 = 0x2002, V4L2_MBUS_FMT_VYUY8_1_5X8 = 0x2003, V4L2_MBUS_FMT_YUYV8_1_5X8 = 0x2004, @@ -65,14 +66,19 @@ enum v4l2_mbus_pixelcode { V4L2_MBUS_FMT_VYUY8_1X16 = 0x2010, V4L2_MBUS_FMT_YUYV8_1X16 = 0x2011, V4L2_MBUS_FMT_YVYU8_1X16 = 0x2012, + V4L2_MBUS_FMT_YDYUYDYV8_1X16 = 0x2014, V4L2_MBUS_FMT_YUYV10_1X20 = 0x200d, V4L2_MBUS_FMT_YVYU10_1X20 = 0x200e, - /* Bayer - next is 0x3015 */ + /* Bayer - next is 0x3019 */ V4L2_MBUS_FMT_SBGGR8_1X8 = 0x3001, V4L2_MBUS_FMT_SGBRG8_1X8 = 0x3013, V4L2_MBUS_FMT_SGRBG8_1X8 = 0x3002, V4L2_MBUS_FMT_SRGGB8_1X8 = 0x3014, + V4L2_MBUS_FMT_SBGGR10_ALAW8_1X8 = 0x3015, + V4L2_MBUS_FMT_SGBRG10_ALAW8_1X8 = 0x3016, + V4L2_MBUS_FMT_SGRBG10_ALAW8_1X8 = 0x3017, + V4L2_MBUS_FMT_SRGGB10_ALAW8_1X8 = 0x3018, V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8 = 0x300b, V4L2_MBUS_FMT_SGBRG10_DPCM8_1X8 = 0x300c, V4L2_MBUS_FMT_SGRBG10_DPCM8_1X8 = 0x3009, -- cgit v1.2.3 From 05ad6fc1d54f106d5b8c598e2f9b59b12f3fb476 Mon Sep 17 00:00:00 2001 From: Manjunath Hadli Date: Thu, 18 Oct 2012 07:58:02 -0300 Subject: [media] v4l2: add new pixel formats supported on dm365 add new macro V4L2_PIX_FMT_SGRBG10ALAW8 and associated formats to represent Bayer format frames compressed by A-LAW algorithm, add V4L2_PIX_FMT_UV8 to represent storage of CbCr data (UV interleaved) only. Signed-off-by: Manjunath Hadli Signed-off-by: Lad, Prabhakar Acked-by: Sakari Ailus Acked-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- .../DocBook/media/v4l/pixfmt-srggb10alaw8.xml | 34 ++++++++++++ Documentation/DocBook/media/v4l/pixfmt-uv8.xml | 62 ++++++++++++++++++++++ Documentation/DocBook/media/v4l/pixfmt.xml | 2 + include/uapi/linux/videodev2.h | 8 +++ 4 files changed, 106 insertions(+) create mode 100644 Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml create mode 100644 Documentation/DocBook/media/v4l/pixfmt-uv8.xml (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml b/Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml new file mode 100644 index 000000000000..c934192e98ce --- /dev/null +++ b/Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml @@ -0,0 +1,34 @@ + + + + V4L2_PIX_FMT_SBGGR10ALAW8 ('aBA8'), + V4L2_PIX_FMT_SGBRG10ALAW8 ('aGA8'), + V4L2_PIX_FMT_SGRBG10ALAW8 ('agA8'), + V4L2_PIX_FMT_SRGGB10ALAW8 ('aRA8'), + + &manvol; + + + + V4L2_PIX_FMT_SBGGR10ALAW8 + + + V4L2_PIX_FMT_SGBRG10ALAW8 + + + V4L2_PIX_FMT_SGRBG10ALAW8 + + + V4L2_PIX_FMT_SRGGB10ALAW8 + + 10-bit Bayer formats compressed to 8 bits + + + Description + The following four pixel formats are raw sRGB / Bayer + formats with 10 bits per color compressed to 8 bits each, + using the A-LAW algorithm. Each color component consumes 8 + bits of memory. In other respects this format is similar to + . + + diff --git a/Documentation/DocBook/media/v4l/pixfmt-uv8.xml b/Documentation/DocBook/media/v4l/pixfmt-uv8.xml new file mode 100644 index 000000000000..c507c1f73cd0 --- /dev/null +++ b/Documentation/DocBook/media/v4l/pixfmt-uv8.xml @@ -0,0 +1,62 @@ + + + V4L2_PIX_FMT_UV8 ('UV8') + &manvol; + + + V4L2_PIX_FMT_UV8 + UV plane interleaved + + + Description + In this format there is no Y plane, Only CbCr plane. ie + (UV interleaved) + + + <constant>V4L2_PIX_FMT_UV8</constant> + pixel image + + + + Byte Order. + Each cell is one byte. + + + + + + start + 0: + Cb00 + Cr00 + Cb01 + Cr01 + + + start + 4: + Cb10 + Cr10 + Cb11 + Cr11 + + + start + 8: + Cb20 + Cr20 + Cb21 + Cr21 + + + start + 12: + Cb30 + Cr30 + Cb31 + Cr31 + + + + + + + + + diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml index bf94f417592c..99b8d2ad6e4f 100644 --- a/Documentation/DocBook/media/v4l/pixfmt.xml +++ b/Documentation/DocBook/media/v4l/pixfmt.xml @@ -673,6 +673,7 @@ access the palette, this must be done with ioctls of the Linux framebuffer API.< &sub-srggb8; &sub-sbggr16; &sub-srggb10; + &sub-srggb10alaw8; &sub-srggb10dpcm8; &sub-srggb12; @@ -701,6 +702,7 @@ information. &sub-y12; &sub-y10b; &sub-y16; + &sub-uv8; &sub-yuyv; &sub-uyvy; &sub-yvyu; diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 3cf3e946e331..39d2cecdf38c 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -334,6 +334,9 @@ struct v4l2_pix_format { /* Palette formats */ #define V4L2_PIX_FMT_PAL8 v4l2_fourcc('P', 'A', 'L', '8') /* 8 8-bit palette */ +/* Chrominance formats */ +#define V4L2_PIX_FMT_UV8 v4l2_fourcc('U', 'V', '8', ' ') /* 8 UV 4:4 */ + /* Luminance+Chrominance formats */ #define V4L2_PIX_FMT_YVU410 v4l2_fourcc('Y', 'V', 'U', '9') /* 9 YVU 4:1:0 */ #define V4L2_PIX_FMT_YVU420 v4l2_fourcc('Y', 'V', '1', '2') /* 12 YVU 4:2:0 */ @@ -386,6 +389,11 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_SGBRG12 v4l2_fourcc('G', 'B', '1', '2') /* 12 GBGB.. RGRG.. */ #define V4L2_PIX_FMT_SGRBG12 v4l2_fourcc('B', 'A', '1', '2') /* 12 GRGR.. BGBG.. */ #define V4L2_PIX_FMT_SRGGB12 v4l2_fourcc('R', 'G', '1', '2') /* 12 RGRG.. GBGB.. */ + /* 10bit raw bayer a-law compressed to 8 bits */ +#define V4L2_PIX_FMT_SBGGR10ALAW8 v4l2_fourcc('a', 'B', 'A', '8') +#define V4L2_PIX_FMT_SGBRG10ALAW8 v4l2_fourcc('a', 'G', 'A', '8') +#define V4L2_PIX_FMT_SGRBG10ALAW8 v4l2_fourcc('a', 'g', 'A', '8') +#define V4L2_PIX_FMT_SRGGB10ALAW8 v4l2_fourcc('a', 'R', 'A', '8') /* 10bit raw bayer DPCM compressed to 8 bits */ #define V4L2_PIX_FMT_SBGGR10DPCM8 v4l2_fourcc('b', 'B', 'A', '8') #define V4L2_PIX_FMT_SGBRG10DPCM8 v4l2_fourcc('b', 'G', 'A', '8') -- cgit v1.2.3 From 1202ecdc24fc88d5b144824f55ec9c8899591caf Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Sun, 21 Oct 2012 16:02:47 -0300 Subject: [media] v4l: Define video buffer flags for timestamp types Define video buffer flags for different timestamp types. Everything up to now have used either realtime clock or monotonic clock, without a way to tell which clock the timestamp was taken from. Also document that the clock source of the timestamp in the timestamp field depends on buffer flags. [mchehab@redhat.com: fix a few wrong references to Kernel 3.8 - as this patch is meant for 3.9] Signed-off-by: Sakari Ailus Acked-by: Laurent Pinchart Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/compat.xml | 12 +++++++ Documentation/DocBook/media/v4l/io.xml | 53 +++++++++++++++++++++++------- Documentation/DocBook/media/v4l/v4l2.xml | 12 ++++++- include/uapi/linux/videodev2.h | 4 +++ 4 files changed, 69 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml index 3dd9e78815d1..ebd2bfd1ee8e 100644 --- a/Documentation/DocBook/media/v4l/compat.xml +++ b/Documentation/DocBook/media/v4l/compat.xml @@ -2477,6 +2477,18 @@ that used it. It was originally scheduled for removal in 2.6.35. +
+ V4L2 in Linux 3.9 + + + Added timestamp types to + flags field in + v4l2_buffer. See . + + +
+
Relation of V4L2 to other Linux multimedia APIs diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml index 388a34032653..09e8dcf5e9c4 100644 --- a/Documentation/DocBook/media/v4l/io.xml +++ b/Documentation/DocBook/media/v4l/io.xml @@ -741,17 +741,19 @@ applications when an output stream. struct timeval timestamp - For input streams this is the -system time (as returned by the gettimeofday() -function) when the first data byte was captured. For output streams -the data will not be displayed before this time, secondary to the -nominal frame rate determined by the current video standard in -enqueued order. Applications can for example zero this field to -display frames as soon as possible. The driver stores the time at -which the first data byte was actually sent out in the -timestamp field. This permits -applications to monitor the drift between the video and system -clock. + For input streams this is time when the first data + byte was captured, as returned by the + clock_gettime() function for the relevant + clock id; see V4L2_BUF_FLAG_TIMESTAMP_* in + . For output streams the data + will not be displayed before this time, secondary to the nominal + frame rate determined by the current video standard in enqueued + order. Applications can for example zero this field to display + frames as soon as possible. The driver stores the time at which + the first data byte was actually sent out in the + timestamp field. This permits + applications to monitor the drift between the video and system + clock. &v4l2-timecode; @@ -1114,6 +1116,35 @@ Typically applications shall use this flag for output buffers if the data in this buffer has not been created by the CPU but by some DMA-capable unit, in which case caches have not been used. + + V4L2_BUF_FLAG_TIMESTAMP_MASK + 0xe000 + Mask for timestamp types below. To test the + timestamp type, mask out bits not belonging to timestamp + type by performing a logical and operation with buffer + flags and timestamp mask. + + + V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN + 0x0000 + Unknown timestamp type. This type is used by + drivers before Linux 3.9 and may be either monotonic (see + below) or realtime (wall clock). Monotonic clock has been + favoured in embedded systems whereas most of the drivers + use the realtime clock. Either kinds of timestamps are + available in user space via + clock_gettime(2) using clock IDs + CLOCK_MONOTONIC and + CLOCK_REALTIME, respectively. + + + V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC + 0x2000 + The buffer timestamp has been taken from the + CLOCK_MONOTONIC clock. To access the + same clock outside V4L2, use + clock_gettime(2) . + diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml index 4d110b1ad3e9..8fe29427c8e4 100644 --- a/Documentation/DocBook/media/v4l/v4l2.xml +++ b/Documentation/DocBook/media/v4l/v4l2.xml @@ -139,6 +139,16 @@ structs, ioctls) must be noted in more detail in the history chapter (compat.xml), along with the possible impact on existing drivers and applications. --> + + 3.9 + 2012-12-03 + sa + Added timestamp types to + v4l2_buffer, see . + + + 3.6 2012-07-02 @@ -472,7 +482,7 @@ and discussions on the V4L mailing list. Video for Linux Two API Specification - Revision 3.6 + Revision 3.9 &sub-common; diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 39d2cecdf38c..94cbe26e9f00 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -701,6 +701,10 @@ struct v4l2_buffer { /* Cache handling flags */ #define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE 0x0800 #define V4L2_BUF_FLAG_NO_CACHE_CLEAN 0x1000 +/* Timestamp type */ +#define V4L2_BUF_FLAG_TIMESTAMP_MASK 0xe000 +#define V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN 0x0000 +#define V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC 0x2000 /** * struct v4l2_exportbuffer - export of video buffer as DMABUF file descriptor -- cgit v1.2.3 From b01189b85b7653a51ebe851fc4d70702cebfed3c Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 28 Nov 2012 14:40:32 -0300 Subject: [media] V4L: DocBook: Add V4L2_MBUS_FMT_YUV10_1X30 media bus pixel code This patch adds definition of media bus code for YUV pixel format transferred in 30-bit samples where each component has 10 bits width. [mchehab@redhat.com: fix a merge conflict at v4l2-mediabus.h] Signed-off-by: Sylwester Nawrocki Signed-off-by: Kyungmin Park Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/subdev-formats.xml | 716 ++++++--------------- Documentation/DocBook/media_api.tmpl | 1 + include/uapi/linux/v4l2-mediabus.h | 3 +- 3 files changed, 195 insertions(+), 525 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index 6f341d1eca1a..cc51372ed5e0 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -973,27 +973,37 @@ - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Identifier @@ -1005,6 +1015,16 @@ Bit + 29 + 28 + 27 + 26 + 25 + 24 + 23 + 22 + 21 + 10 19 18 17 @@ -1032,16 +1052,8 @@ V4L2_MBUS_FMT_Y8_1X8 0x2001 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1107,16 +1119,8 @@ V4L2_MBUS_FMT_UYVY8_1_5X8 0x2002 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1132,16 +1136,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1157,16 +1153,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1182,16 +1170,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1207,16 +1187,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1232,16 +1204,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1257,16 +1221,8 @@ V4L2_MBUS_FMT_VYUY8_1_5X8 0x2003 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1282,16 +1238,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1307,16 +1255,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1332,16 +1272,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1357,16 +1289,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1382,16 +1306,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1407,16 +1323,8 @@ V4L2_MBUS_FMT_YUYV8_1_5X8 0x2004 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1432,16 +1340,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1457,16 +1357,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1482,16 +1374,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1507,16 +1391,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1532,16 +1408,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1557,16 +1425,8 @@ V4L2_MBUS_FMT_YVYU8_1_5X8 0x2005 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1582,16 +1442,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1607,16 +1459,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1632,16 +1476,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1657,16 +1493,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1682,16 +1510,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1707,16 +1527,8 @@ V4L2_MBUS_FMT_UYVY8_2X8 0x2006 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1732,16 +1544,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1754,19 +1558,11 @@ y0 - - - - - - - - - - - - - - - - - - - - - - - + + + + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1782,16 +1578,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1807,16 +1595,8 @@ V4L2_MBUS_FMT_VYUY8_2X8 0x2007 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1832,16 +1612,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1857,16 +1629,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1882,16 +1646,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1907,16 +1663,8 @@ V4L2_MBUS_FMT_YUYV8_2X8 0x2008 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1932,16 +1680,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1957,16 +1697,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1982,16 +1714,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -2007,16 +1731,8 @@ V4L2_MBUS_FMT_YVYU8_2X8 0x2009 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -2032,16 +1748,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -2057,16 +1765,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -2082,16 +1782,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -2107,16 +1799,8 @@ V4L2_MBUS_FMT_Y10_1X10 0x200a - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; y9 y8 y7 @@ -2132,16 +1816,8 @@ V4L2_MBUS_FMT_YUYV10_2X10 0x200b - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; y9 y8 y7 @@ -2157,16 +1833,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; u9 u8 u7 @@ -2182,16 +1850,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; y9 y8 y7 @@ -2207,16 +1867,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; v9 v8 v7 @@ -2232,16 +1884,8 @@ V4L2_MBUS_FMT_YVYU10_2X10 0x200c - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; y9 y8 y7 @@ -2257,16 +1901,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; v9 v8 v7 @@ -2282,16 +1918,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; y9 y8 y7 @@ -2307,16 +1935,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; u9 u8 u7 @@ -2332,6 +1952,7 @@ V4L2_MBUS_FMT_Y12_1X12 0x2013 + &dash-ent-10; - - - @@ -2357,6 +1978,7 @@ V4L2_MBUS_FMT_UYVY8_1X16 0x200f + &dash-ent-10; - - - @@ -2382,6 +2004,7 @@ + &dash-ent-10; - - - @@ -2407,6 +2030,7 @@ V4L2_MBUS_FMT_VYUY8_1X16 0x2010 + &dash-ent-10; - - - @@ -2432,6 +2056,7 @@ + &dash-ent-10; - - - @@ -2457,6 +2082,7 @@ V4L2_MBUS_FMT_YUYV8_1X16 0x2011 + &dash-ent-10; - - - @@ -2482,6 +2108,7 @@ + &dash-ent-10; - - - @@ -2507,6 +2134,7 @@ V4L2_MBUS_FMT_YVYU8_1X16 0x2012 + &dash-ent-10; - - - @@ -2532,6 +2160,7 @@ + &dash-ent-10; - - - @@ -2657,6 +2286,7 @@ V4L2_MBUS_FMT_YUYV10_1X20 0x200d + &dash-ent-10; y9 y8 y7 @@ -2682,6 +2312,7 @@ + &dash-ent-10; y9 y8 y7 @@ -2707,6 +2338,7 @@ V4L2_MBUS_FMT_YVYU10_1X20 0x200e + &dash-ent-10; y9 y8 y7 @@ -2732,6 +2364,32 @@ + &dash-ent-10; + y9 + y8 + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + u9 + u8 + u7 + u6 + u5 + u4 + u3 + u2 + u1 + u0 + + + V4L2_MBUS_FMT_YUV10_1X30 + 0x2014 + y9 y8 y7 @@ -2752,6 +2410,16 @@ u2 u1 u0 + v9 + v8 + v7 + v6 + v5 + v4 + v3 + v2 + v1 + v0 diff --git a/Documentation/DocBook/media_api.tmpl b/Documentation/DocBook/media_api.tmpl index f2413acfe241..1f6593deb995 100644 --- a/Documentation/DocBook/media_api.tmpl +++ b/Documentation/DocBook/media_api.tmpl @@ -22,6 +22,7 @@ http://linuxtv.org/repo/"> +----------"> ]> diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h index e860f55820ec..b9b7bea04537 100644 --- a/include/uapi/linux/v4l2-mediabus.h +++ b/include/uapi/linux/v4l2-mediabus.h @@ -47,7 +47,7 @@ enum v4l2_mbus_pixelcode { V4L2_MBUS_FMT_RGB565_2X8_BE = 0x1007, V4L2_MBUS_FMT_RGB565_2X8_LE = 0x1008, - /* YUV (including grey) - next is 0x2016 */ + /* YUV (including grey) - next is 0x2017 */ V4L2_MBUS_FMT_Y8_1X8 = 0x2001, V4L2_MBUS_FMT_UV8_1X8 = 0x2015, V4L2_MBUS_FMT_UYVY8_1_5X8 = 0x2002, @@ -69,6 +69,7 @@ enum v4l2_mbus_pixelcode { V4L2_MBUS_FMT_YDYUYDYV8_1X16 = 0x2014, V4L2_MBUS_FMT_YUYV10_1X20 = 0x200d, V4L2_MBUS_FMT_YVYU10_1X20 = 0x200e, + V4L2_MBUS_FMT_YUV10_1X30 = 0x2016, /* Bayer - next is 0x3019 */ V4L2_MBUS_FMT_SBGGR8_1X8 = 0x3001, -- cgit v1.2.3 From 9a57247f31e361f80508c40363366222dbbb6aa5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 27 Dec 2012 23:49:39 +0000 Subject: rtnl: expose carrier value with possibility to set it Signed-off-by: Jiri Pirko Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- Documentation/networking/operstates.txt | 4 ++++ include/uapi/linux/if_link.h | 1 + net/core/rtnetlink.c | 10 ++++++++++ 3 files changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt index 1a77a3cfae54..97694572338b 100644 --- a/Documentation/networking/operstates.txt +++ b/Documentation/networking/operstates.txt @@ -88,6 +88,10 @@ set this flag. On netif_carrier_off(), the scheduler stops sending packets. The name 'carrier' and the inversion are historical, think of it as lower layer. +Note that for certain kind of soft-devices, which are not managing any +real hardware, there is possible to set this bit from userpsace. +One should use TVL IFLA_CARRIER to do so. + netif_carrier_ok() can be used to query that bit. __LINK_STATE_DORMANT, maps to IFF_DORMANT: diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 60f3b6b90602..c4edfe11f1f7 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -142,6 +142,7 @@ enum { #define IFLA_PROMISCUITY IFLA_PROMISCUITY IFLA_NUM_TX_QUEUES, IFLA_NUM_RX_QUEUES, + IFLA_CARRIER, __IFLA_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1868625af25e..2ef7a56ba117 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -780,6 +780,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MTU */ + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(1) /* IFLA_CARRIER */ + nla_total_size(4) /* IFLA_PROMISCUITY */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ @@ -909,6 +910,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u32(skb, IFLA_LINK, dev->iflink)) || (dev->master && nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || + nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || (dev->ifalias && @@ -1108,6 +1110,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_LINK] = { .type = NLA_U32 }, [IFLA_MASTER] = { .type = NLA_U32 }, + [IFLA_CARRIER] = { .type = NLA_U8 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, @@ -1438,6 +1441,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_CARRIER]) { + err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); -- cgit v1.2.3 From fee5dfecb0c74c9eab475a2a20d7a5ababe2f8e6 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 17 Dec 2012 13:20:43 +0100 Subject: HID: uhid: use __packed__ for uhid_feature_answer_req We use __packed__ for all API structures so we can extend them without breaking alignment rules. We do try to explicitly align the structures, but to be safe we also use __packed__. uhid_feature_answer_req is already 64bit aligned so we can add __packed__ without breaking ABI. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- include/uapi/linux/uhid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/uhid.h b/include/uapi/linux/uhid.h index 9c6974f16966..e9ed951e2b09 100644 --- a/include/uapi/linux/uhid.h +++ b/include/uapi/linux/uhid.h @@ -86,7 +86,7 @@ struct uhid_feature_answer_req { __u16 err; __u16 size; __u8 data[UHID_DATA_MAX]; -}; +} __attribute__((__packed__)); struct uhid_event { __u32 type; -- cgit v1.2.3 From d582cffbcd04eae0bd8a83b05648bfd54bfd21c9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Oct 2012 17:53:44 +0200 Subject: nl80211/mac80211: support full station state in AP mode Today, stations are added already associated. That is inefficient if, for example, the driver has no room for stations any more because then the station will go through the entire auth/assoc handshake, only to be kicked out afterwards. To address this a bit better, at least with drivers using the new station state callback, allow hostapd to add stations in unauthenticated mode, just after receiving the AUTH frame, before even replying. Thus if there's no more space at that point, it can send a negative auth frame back. It still needs to handle later state transition errors though, of course. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++++ net/mac80211/cfg.c | 115 ++++++++++++++++++++++++++----------------- net/mac80211/main.c | 3 +- net/wireless/nl80211.c | 24 +++++++++ 4 files changed, 113 insertions(+), 45 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e3e19f8b16f2..547017100a30 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1697,6 +1697,9 @@ enum nl80211_iftype { * flag can't be changed, it is only valid while adding a station, and * attempts to change it will silently be ignored (rather than rejected * as errors.) + * @NL80211_STA_FLAG_ASSOCIATED: station is associated; used with drivers + * that support %NL80211_FEATURE_FULL_AP_CLIENT_STATE to transition a + * previously added station into associated state * @NL80211_STA_FLAG_MAX: highest station flag number currently defined * @__NL80211_STA_FLAG_AFTER_LAST: internal use */ @@ -1708,6 +1711,7 @@ enum nl80211_sta_flags { NL80211_STA_FLAG_MFP, NL80211_STA_FLAG_AUTHENTICATED, NL80211_STA_FLAG_TDLS_PEER, + NL80211_STA_FLAG_ASSOCIATED, /* keep last */ __NL80211_STA_FLAG_AFTER_LAST, @@ -3140,6 +3144,17 @@ enum nl80211_ap_sme_features { * setting * @NL80211_FEATURE_P2P_GO_OPPPS: P2P GO implementation supports opportunistic * powersave + * @NL80211_FEATURE_FULL_AP_CLIENT_STATE: The driver supports full state + * transitions for AP clients. Without this flag (and if the driver + * doesn't have the AP SME in the device) the driver supports adding + * stations only when they're associated and adds them in associated + * state (to later be transitioned into authorized), with this flag + * they should be added before even sending the authentication reply + * and then transitioned into authenticated, associated and authorized + * states using station flags. + * Note that even for drivers that support this, the default is to add + * stations in authenticated/associated state, so to add unauthenticated + * stations the authenticated/associated bits have to be set in the mask. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3155,6 +3170,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, + NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 13, }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3e7d557fd481..f4d12c71928d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -510,6 +510,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_TDLS_PEER); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); @@ -521,6 +522,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); if (test_sta_flag(sta, WLAN_STA_AUTH)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); + if (test_sta_flag(sta, WLAN_STA_ASSOC)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); } @@ -1077,6 +1080,58 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) netif_rx_ni(skb); } +static int sta_apply_auth_flags(struct ieee80211_local *local, + struct sta_info *sta, + u32 mask, u32 set) +{ + int ret; + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + set & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { + if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); + else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + else + ret = 0; + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !(set & BIT(NL80211_STA_FLAG_ASSOCIATED)) && + test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && + test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_NONE); + if (ret) + return ret; + } + + return 0; +} + static int sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) @@ -1094,52 +1149,20 @@ static int sta_apply_parameters(struct ieee80211_local *local, mask = params->sta_flags_mask; set = params->sta_flags_set; - /* - * In mesh mode, we can clear AUTHENTICATED flag but must - * also make ASSOCIATED follow appropriately for the driver - * API. See also below, after AUTHORIZED changes. - */ - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && - !test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { - if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); - else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (!(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && - test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_NONE); - if (ret) - return ret; - } + if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* + * In mesh mode, ASSOCIATED isn't part of the nl80211 + * API but must follow AUTHENTICATED for driver state. + */ + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); + if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + set |= BIT(NL80211_STA_FLAG_ASSOCIATED); } + ret = sta_apply_auth_flags(local, sta, mask, set); + if (ret) + return ret; if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) @@ -1273,6 +1296,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!sta) return -ENOMEM; + /* + * defaults -- if userspace wants something else we'll + * change it accordingly in sta_apply_parameters() + */ sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e6514f240fce..39cfe8f10ad2 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -541,7 +541,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | NL80211_FEATURE_SAE | NL80211_FEATURE_HT_IBSS | - NL80211_FEATURE_VIF_TXPOWER; + NL80211_FEATURE_VIF_TXPOWER | + NL80211_FEATURE_FULL_AP_CLIENT_STATE; if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b3cf7cc0d4a1..087f68ba6d7a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3231,11 +3231,21 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* accept only the listed bits */ if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP))) return -EINVAL; + /* but authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) @@ -3393,17 +3403,31 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* but don't bother the driver with it */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + /* allow authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); break; case NL80211_IFTYPE_MESH_POINT: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* TDLS peers cannot be added */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) return -EINVAL; break; case NL80211_IFTYPE_STATION: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* Only TDLS peers can be added */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) return -EINVAL; -- cgit v1.2.3 From 24b9f50170f55a3179c6f6d51022eb7d50502d05 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 3 Jan 2013 12:30:30 -0300 Subject: [media] V4L: Remove deprecated image centering controls It has been over 3 years since the V4L2_CID_[HV]CENTER were deprecated. Clean up the DocBook and remove the V4L2_CID_VCENTER_DEPRECATED, V4L2_CID_VCENTER_DEPRECATED control related paragraphs. Remove the V4L2_CID_[HV]CENTER controls definitions from v4l2-controls.h, these controls are not used by any driver in the mainline now. Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/controls.xml | 23 ----------------------- drivers/media/v4l2-core/v4l2-ctrls.c | 2 -- include/uapi/linux/v4l2-controls.h | 4 ---- 3 files changed, 29 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml index 7fe5be1d3bbb..9e8f85498678 100644 --- a/Documentation/DocBook/media/v4l/controls.xml +++ b/Documentation/DocBook/media/v4l/controls.xml @@ -203,29 +203,6 @@ and should not be used in new drivers and applications. boolean Mirror the picture vertically. - - V4L2_CID_HCENTER_DEPRECATED (formerly V4L2_CID_HCENTER) - integer - Horizontal image centering. This control is -deprecated. New drivers and applications should use the Camera class controls -V4L2_CID_PAN_ABSOLUTE, -V4L2_CID_PAN_RELATIVE and -V4L2_CID_PAN_RESET instead. - - - V4L2_CID_VCENTER_DEPRECATED - (formerly V4L2_CID_VCENTER) - integer - Vertical image centering. Centering is intended to -physically adjust cameras. For image cropping see -, for clipping . This -control is deprecated. New drivers and applications should use the -Camera class controls -V4L2_CID_TILT_ABSOLUTE, -V4L2_CID_TILT_RELATIVE and -V4L2_CID_TILT_RESET instead. - V4L2_CID_POWER_LINE_FREQUENCY enum diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index fa02363e7db4..7b486ac3f4d9 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -577,8 +577,6 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_GAIN: return "Gain"; case V4L2_CID_HFLIP: return "Horizontal Flip"; case V4L2_CID_VFLIP: return "Vertical Flip"; - case V4L2_CID_HCENTER: return "Horizontal Center"; - case V4L2_CID_VCENTER: return "Vertical Center"; case V4L2_CID_POWER_LINE_FREQUENCY: return "Power Line Frequency"; case V4L2_CID_HUE_AUTO: return "Hue, Automatic"; case V4L2_CID_WHITE_BALANCE_TEMPERATURE: return "White Balance Temperature"; diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index f56c945cecd4..4dc0822700fe 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -88,10 +88,6 @@ #define V4L2_CID_HFLIP (V4L2_CID_BASE+20) #define V4L2_CID_VFLIP (V4L2_CID_BASE+21) -/* Deprecated; use V4L2_CID_PAN_RESET and V4L2_CID_TILT_RESET */ -#define V4L2_CID_HCENTER (V4L2_CID_BASE+22) -#define V4L2_CID_VCENTER (V4L2_CID_BASE+23) - #define V4L2_CID_POWER_LINE_FREQUENCY (V4L2_CID_BASE+24) enum v4l2_power_line_frequency { V4L2_CID_POWER_LINE_FREQUENCY_DISABLED = 0, -- cgit v1.2.3 From bb65a9cb953fdfe9c507e8dbb6c4ec2540484bd3 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 28 Dec 2012 16:06:28 +0800 Subject: xfrm: removes a superfluous check and add a statistic Remove the check if x->km.state equal to XFRM_STATE_VALID in xfrm_state_check_expire(), which will be done before call xfrm_state_check_expire(). add a LINUX_MIB_XFRMOUTSTATEINVALID statistic to record the outbound error due to invalid xfrm state. Signed-off-by: Li RongQing Signed-off-by: Steffen Klassert --- include/uapi/linux/snmp.h | 1 + net/xfrm/xfrm_output.c | 6 ++++++ net/xfrm/xfrm_proc.c | 1 + net/xfrm/xfrm_state.c | 3 --- 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index fdfba235f9f1..b49eab89c9fd 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -278,6 +278,7 @@ enum LINUX_MIB_XFRMOUTPOLDEAD, /* XfrmOutPolDead */ LINUX_MIB_XFRMOUTPOLERROR, /* XfrmOutPolError */ LINUX_MIB_XFRMFWDHDRERROR, /* XfrmFwdHdrError*/ + LINUX_MIB_XFRMOUTSTATEINVALID, /* XfrmOutStateInvalid */ __LINUX_MIB_XFRMMAX }; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 95a338c89f99..3670526e70b9 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -61,6 +61,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err) } spin_lock_bh(&x->lock); + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); + goto error_nolock; + } + err = xfrm_state_check_expire(x); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index d0a1af8ed584..603903853e89 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -43,6 +43,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD), SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), + SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), SNMP_MIB_SENTINEL }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3459692092ec..05db2362a231 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1370,9 +1370,6 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (!x->curlft.use_time) x->curlft.use_time = get_seconds(); - if (x->km.state != XFRM_STATE_VALID) - return -EINVAL; - if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; -- cgit v1.2.3 From d8346b7d9bab37e6cc712ff1622c65ff98bdfef8 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 20 Dec 2012 15:32:08 +0100 Subject: KVM: s390: Support for I/O interrupts. Add support for handling I/O interrupts (standard, subchannel-related ones and rudimentary adapter interrupts). The subchannel-identifying parameters are encoded into the interrupt type. I/O interrupts are floating, so they can't be injected on a specific vcpu. Reviewed-by: Alexander Graf Reviewed-by: Marcelo Tosatti Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- Documentation/virtual/kvm/api.txt | 4 ++ arch/s390/include/asm/kvm_host.h | 2 + arch/s390/kvm/interrupt.c | 103 +++++++++++++++++++++++++++++++++++++- include/uapi/linux/kvm.h | 9 ++++ 4 files changed, 116 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a4df5535996b..83bd92b52936 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2069,6 +2069,10 @@ KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm KVM_S390_INT_EMERGENCY (vcpu) - sigp emergency; source cpu in parm KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm +KVM_S390_INT_IO(ai,cssid,ssid,schid) (vm) - compound value to indicate an + I/O interrupt (ai - adapter interrupt; cssid,ssid,schid - subchannel); + I/O interruption parameters in parm (subchannel) and parm64 (intparm, + interruption subclass) Note that the vcpu ioctl is asynchronous to vcpu execution. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 711c5ab391cf..a8e35c43df78 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -74,6 +74,7 @@ struct kvm_s390_sie_block { __u64 epoch; /* 0x0038 */ __u8 reserved40[4]; /* 0x0040 */ #define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 __u16 lctl; /* 0x0044 */ __s16 icpua; /* 0x0046 */ __u32 ictl; /* 0x0048 */ @@ -125,6 +126,7 @@ struct kvm_vcpu_stat { u32 deliver_prefix_signal; u32 deliver_restart_signal; u32 deliver_program_int; + u32 deliver_io_int; u32 exit_wait_state; u32 instruction_stidp; u32 instruction_spx; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c30615e605ac..52cdf20906ab 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -21,11 +21,26 @@ #include "gaccess.h" #include "trace-s390.h" +#define IOINT_SCHID_MASK 0x0000ffff +#define IOINT_SSID_MASK 0x00030000 +#define IOINT_CSSID_MASK 0x03fc0000 +#define IOINT_AI_MASK 0x04000000 + +static int is_ioint(u64 type) +{ + return ((type & 0xfffe0000u) != 0xfffe0000u); +} + static int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); } +static int psw_ioint_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO); +} + static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) { if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || @@ -67,7 +82,15 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, case KVM_S390_SIGP_SET_PREFIX: case KVM_S390_RESTART: return 1; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (psw_ioint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[6] & inti->io.io_int_word) + return 1; + return 0; default: + printk(KERN_WARNING "illegal interrupt type %llx\n", + inti->type); BUG(); } return 0; @@ -116,6 +139,12 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, case KVM_S390_SIGP_STOP: __set_cpuflag(vcpu, CPUSTAT_STOP_INT); break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (psw_ioint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_IO_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR6; + break; default: BUG(); } @@ -297,6 +326,47 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, exception = 1; break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + { + __u32 param0 = ((__u32)inti->io.subchannel_id << 16) | + inti->io.subchannel_nr; + __u64 param1 = ((__u64)inti->io.io_int_parm << 32) | + inti->io.io_int_word; + VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); + vcpu->stat.deliver_io_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + param0, param1); + rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID, + inti->io.subchannel_id); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR, + inti->io.subchannel_nr); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_IO_INT_PARM, + inti->io.io_int_parm); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_IO_INT_WORD, + inti->io.io_int_word); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_IO_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + } default: BUG(); } @@ -545,7 +615,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, { struct kvm_s390_local_interrupt *li; struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *inti; + struct kvm_s390_interrupt_info *inti, *iter; int sigcpu; inti = kzalloc(sizeof(*inti), GFP_KERNEL); @@ -569,6 +639,22 @@ int kvm_s390_inject_vm(struct kvm *kvm, case KVM_S390_SIGP_STOP: case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: + kfree(inti); + return -EINVAL; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (s390int->type & IOINT_AI_MASK) + VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); + else + VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", + s390int->type & IOINT_CSSID_MASK, + s390int->type & IOINT_SSID_MASK, + s390int->type & IOINT_SCHID_MASK); + inti->type = s390int->type; + inti->io.subchannel_id = s390int->parm >> 16; + inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; + inti->io.io_int_parm = s390int->parm64 >> 32; + inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull; + break; default: kfree(inti); return -EINVAL; @@ -579,7 +665,19 @@ int kvm_s390_inject_vm(struct kvm *kvm, mutex_lock(&kvm->lock); fi = &kvm->arch.float_int; spin_lock(&fi->lock); - list_add_tail(&inti->list, &fi->list); + if (!is_ioint(inti->type)) + list_add_tail(&inti->list, &fi->list); + else { + /* Keep I/O interrupts sorted in isc order. */ + list_for_each_entry(iter, &fi->list, list) { + if (!is_ioint(iter->type)) + continue; + if (iter->io.io_int_word <= inti->io.io_int_word) + continue; + break; + } + list_add_tail(&inti->list, &iter->list); + } atomic_set(&fi->active, 1); sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); if (sigcpu == KVM_MAX_VCPUS) { @@ -653,6 +751,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, break; case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: default: kfree(inti); return -EINVAL; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e6e5d4b13708..54540bdd3340 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -401,6 +401,15 @@ struct kvm_s390_psw { #define KVM_S390_INT_SERVICE 0xffff2401u #define KVM_S390_INT_EMERGENCY 0xffff1201u #define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +/* Anything below 0xfffe0000u is taken by INT_IO */ +#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ + (((schid)) | \ + ((ssid) << 16) | \ + ((cssid) << 18) | \ + ((ai) << 26)) +#define KVM_S390_INT_IO_MIN 0x00000000u +#define KVM_S390_INT_IO_MAX 0xfffdffffu + struct kvm_s390_interrupt { __u32 type; -- cgit v1.2.3 From 48a3e950f4cee6a345ffbe9baf599f1e9a54c479 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 20 Dec 2012 15:32:09 +0100 Subject: KVM: s390: Add support for machine checks. Add support for injecting machine checks (only repressible conditions for now). This is a bit more involved than I/O interrupts, for these reasons: - Machine checks come in both floating and cpu varieties. - We don't have a bit for machine checks enabling, but have to use a roundabout approach with trapping PSW changing instructions and watching for opened machine checks. Reviewed-by: Alexander Graf Reviewed-by: Marcelo Tosatti Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- Documentation/virtual/kvm/api.txt | 4 ++ arch/s390/include/asm/kvm_host.h | 8 +++ arch/s390/kvm/intercept.c | 2 + arch/s390/kvm/interrupt.c | 112 +++++++++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.h | 3 + arch/s390/kvm/priv.c | 135 ++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/trace-s390.h | 6 +- include/uapi/linux/kvm.h | 1 + 8 files changed, 268 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 83bd92b52936..8a0de309932f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2073,6 +2073,10 @@ KVM_S390_INT_IO(ai,cssid,ssid,schid) (vm) - compound value to indicate an I/O interrupt (ai - adapter interrupt; cssid,ssid,schid - subchannel); I/O interruption parameters in parm (subchannel) and parm64 (intparm, interruption subclass) +KVM_S390_MCHK (vm, vcpu) - machine check interrupt; cr 14 bits in parm, + machine check interrupt code in parm64 (note that + machine checks needing further payload are not + supported by this ioctl) Note that the vcpu ioctl is asynchronous to vcpu execution. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a8e35c43df78..29363d155cd5 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -75,8 +75,10 @@ struct kvm_s390_sie_block { __u8 reserved40[4]; /* 0x0040 */ #define LCTL_CR0 0x8000 #define LCTL_CR6 0x0200 +#define LCTL_CR14 0x0002 __u16 lctl; /* 0x0044 */ __s16 icpua; /* 0x0046 */ +#define ICTL_LPSW 0x00400000 __u32 ictl; /* 0x0048 */ __u32 eca; /* 0x004c */ __u8 icptcode; /* 0x0050 */ @@ -187,6 +189,11 @@ struct kvm_s390_emerg_info { __u16 code; }; +struct kvm_s390_mchk_info { + __u64 cr14; + __u64 mcic; +}; + struct kvm_s390_interrupt_info { struct list_head list; u64 type; @@ -197,6 +204,7 @@ struct kvm_s390_interrupt_info { struct kvm_s390_emerg_info emerg; struct kvm_s390_extcall_info extcall; struct kvm_s390_prefix_info prefix; + struct kvm_s390_mchk_info mchk; }; }; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index df6c0ad085aa..950c13ecaf60 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -97,10 +97,12 @@ static int handle_lctl(struct kvm_vcpu *vcpu) static const intercept_handler_t instruction_handlers[256] = { [0x01] = kvm_s390_handle_01, + [0x82] = kvm_s390_handle_lpsw, [0x83] = kvm_s390_handle_diag, [0xae] = kvm_s390_handle_sigp, [0xb2] = kvm_s390_handle_b2, [0xb7] = handle_lctl, + [0xb9] = kvm_s390_handle_b9, [0xe5] = kvm_s390_handle_e5, [0xeb] = handle_lctlg, }; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 52cdf20906ab..b3b4748485ee 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -41,6 +41,11 @@ static int psw_ioint_disabled(struct kvm_vcpu *vcpu) return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO); } +static int psw_mchk_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK); +} + static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) { if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || @@ -82,6 +87,12 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, case KVM_S390_SIGP_SET_PREFIX: case KVM_S390_RESTART: return 1; + case KVM_S390_MCHK: + if (psw_mchk_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14) + return 1; + return 0; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: if (psw_ioint_disabled(vcpu)) return 0; @@ -116,6 +127,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); vcpu->arch.sie_block->lctl = 0x0000; + vcpu->arch.sie_block->ictl &= ~ICTL_LPSW; } static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) @@ -139,6 +151,12 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, case KVM_S390_SIGP_STOP: __set_cpuflag(vcpu, CPUSTAT_STOP_INT); break; + case KVM_S390_MCHK: + if (psw_mchk_disabled(vcpu)) + vcpu->arch.sie_block->ictl |= ICTL_LPSW; + else + vcpu->arch.sie_block->lctl |= LCTL_CR14; + break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: if (psw_ioint_disabled(vcpu)) __set_cpuflag(vcpu, CPUSTAT_IO_INT); @@ -326,6 +344,32 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, exception = 1; break; + case KVM_S390_MCHK: + VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + inti->mchk.mcic); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->mchk.cr14, + inti->mchk.mcic); + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_PREFIXED); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_MCK_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: { __u32 param0 = ((__u32)inti->io.subchannel_id << 16) | @@ -588,6 +632,61 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) } } +void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; + int deliver; + + __reset_intercept_indicators(vcpu); + if (atomic_read(&li->active)) { + do { + deliver = 0; + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + if ((inti->type == KVM_S390_MCHK) && + __interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&li->list)) + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } + + if (atomic_read(&fi->active)) { + do { + deliver = 0; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + if ((inti->type == KVM_S390_MCHK) && + __interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } +} + int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -641,6 +740,13 @@ int kvm_s390_inject_vm(struct kvm *kvm, case KVM_S390_INT_EMERGENCY: kfree(inti); return -EINVAL; + case KVM_S390_MCHK: + VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", + s390int->parm64); + inti->type = s390int->type; + inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ + inti->mchk.mcic = s390int->parm64; + break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: if (s390int->type & IOINT_AI_MASK) VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); @@ -749,6 +855,12 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, inti->type = s390int->type; inti->emerg.code = s390int->parm; break; + case KVM_S390_MCHK: + VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", + s390int->parm64); + inti->type = s390int->type; + inti->mchk.mcic = s390int->parm64; + break; case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index dccc0242b7ca..1f7cc6ccf102 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -106,6 +106,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); +void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int); int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, @@ -117,6 +118,8 @@ int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); int kvm_s390_handle_01(struct kvm_vcpu *vcpu); +int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); +int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d715842f56ca..d3cbcd3c9ada 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include "gaccess.h" #include "kvm-s390.h" #include "trace.h" @@ -166,6 +168,99 @@ static int handle_stfl(struct kvm_vcpu *vcpu) return 0; } +static void handle_new_psw(struct kvm_vcpu *vcpu) +{ + /* Check whether the new psw is enabled for machine checks. */ + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK) + kvm_s390_deliver_pending_machine_checks(vcpu); +} + +#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) +#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL +#define PSW_ADDR_24 0x00000000000fffffUL +#define PSW_ADDR_31 0x000000007fffffffUL + +int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) +{ + u64 addr; + psw_compat_t new_psw; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, + PGM_PRIVILEGED_OPERATION); + + addr = kvm_s390_get_base_disp_s(vcpu); + + if (addr & 7) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + if (!(new_psw.mask & PSW32_MASK_BASE)) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + vcpu->arch.sie_block->gpsw.mask = + (new_psw.mask & ~PSW32_MASK_BASE) << 32; + vcpu->arch.sie_block->gpsw.addr = new_psw.addr; + + if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || + (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && + (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || + ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == + PSW_MASK_EA)) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + handle_new_psw(vcpu); +out: + return 0; +} + +static int handle_lpswe(struct kvm_vcpu *vcpu) +{ + u64 addr; + psw_t new_psw; + + addr = kvm_s390_get_base_disp_s(vcpu); + + if (addr & 7) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + vcpu->arch.sie_block->gpsw.mask = new_psw.mask; + vcpu->arch.sie_block->gpsw.addr = new_psw.addr; + + if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || + (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == + PSW_MASK_BA) && + (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) || + (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && + (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || + ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == + PSW_MASK_EA)) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + handle_new_psw(vcpu); +out: + return 0; +} + static int handle_stidp(struct kvm_vcpu *vcpu) { u64 operand2; @@ -292,6 +387,7 @@ static const intercept_handler_t priv_handlers[256] = { [0x5f] = handle_chsc, [0x7d] = handle_stsi, [0xb1] = handle_stfl, + [0xb2] = handle_lpswe, }; int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) @@ -316,6 +412,45 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +static int handle_epsw(struct kvm_vcpu *vcpu) +{ + int reg1, reg2; + + reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24; + reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; + + /* This basically extracts the mask half of the psw. */ + vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; + vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; + if (reg2) { + vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000; + vcpu->run->s.regs.gprs[reg2] |= + vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff; + } + return 0; +} + +static const intercept_handler_t b9_handlers[256] = { + [0x8d] = handle_epsw, +}; + +int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + /* This is handled just as for the B2 instructions. */ + handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) { + if ((handler != handle_epsw) && + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)) + return kvm_s390_inject_program_int(vcpu, + PGM_PRIVILEGED_OPERATION); + else + return handler(vcpu); + } + return -EOPNOTSUPP; +} + static int handle_tprot(struct kvm_vcpu *vcpu) { u64 address1, address2; diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 90fdf85b5ff7..95fbc1ab88dc 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -141,13 +141,13 @@ TRACE_EVENT(kvm_s390_inject_vcpu, * Trace point for the actual delivery of interrupts. */ TRACE_EVENT(kvm_s390_deliver_interrupt, - TP_PROTO(unsigned int id, __u64 type, __u32 data0, __u64 data1), + TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1), TP_ARGS(id, type, data0, data1), TP_STRUCT__entry( __field(int, id) __field(__u32, inttype) - __field(__u32, data0) + __field(__u64, data0) __field(__u64, data1) ), @@ -159,7 +159,7 @@ TRACE_EVENT(kvm_s390_deliver_interrupt, ), TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \ - "data:%08x %016llx", + "data:%08llx %016llx", __entry->id, __entry->inttype, __print_symbolic(__entry->inttype, kvm_s390_int_type), __entry->data0, __entry->data1) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 54540bdd3340..80bb3b801116 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -397,6 +397,7 @@ struct kvm_s390_psw { #define KVM_S390_PROGRAM_INT 0xfffe0001u #define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u #define KVM_S390_RESTART 0xfffe0003u +#define KVM_S390_MCHK 0xfffe1000u #define KVM_S390_INT_VIRTIO 0xffff2603u #define KVM_S390_INT_SERVICE 0xffff2401u #define KVM_S390_INT_EMERGENCY 0xffff1201u -- cgit v1.2.3 From fa6b7fe9928d50444c29b29c8563746c6b0c6299 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 20 Dec 2012 15:32:12 +0100 Subject: KVM: s390: Add support for channel I/O instructions. Add a new capability, KVM_CAP_S390_CSS_SUPPORT, which will pass intercepts for channel I/O instructions to userspace. Only I/O instructions interacting with I/O interrupts need to be handled in-kernel: - TEST PENDING INTERRUPTION (tpi) dequeues and stores pending interrupts entirely in-kernel. - TEST SUBCHANNEL (tsch) dequeues pending interrupts in-kernel and exits via KVM_EXIT_S390_TSCH to userspace for subchannel- related processing. Reviewed-by: Marcelo Tosatti Reviewed-by: Alexander Graf Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- Documentation/virtual/kvm/api.txt | 30 +++++++++++++ arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/intercept.c | 1 + arch/s390/kvm/interrupt.c | 37 ++++++++++++++++ arch/s390/kvm/kvm-s390.c | 12 ++++++ arch/s390/kvm/kvm-s390.h | 2 + arch/s390/kvm/priv.c | 91 +++++++++++++++++++++++++++++++++++++-- arch/s390/kvm/trace-s390.h | 20 +++++++++ include/trace/events/kvm.h | 2 +- include/uapi/linux/kvm.h | 11 +++++ 10 files changed, 202 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 73bd159c5559..f2d6391178b9 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2350,6 +2350,22 @@ The possible hypercalls are defined in the Power Architecture Platform Requirements (PAPR) document available from www.power.org (free developer registration required to access it). + /* KVM_EXIT_S390_TSCH */ + struct { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; + __u32 ipb; + __u8 dequeued; + } s390_tsch; + +s390 specific. This exit occurs when KVM_CAP_S390_CSS_SUPPORT has been enabled +and TEST SUBCHANNEL was intercepted. If dequeued is set, a pending I/O +interrupt for the target subchannel has been dequeued and subchannel_id, +subchannel_nr, io_int_parm and io_int_word contain the parameters for that +interrupt. ipb is needed for instruction parameter decoding. + /* Fix the size of the union. */ char padding[256]; }; @@ -2471,3 +2487,17 @@ For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV: where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value. - The tsize field of mas1 shall be set to 4K on TLB0, even though the hardware ignores this value for TLB0. + +6.4 KVM_CAP_S390_CSS_SUPPORT + +Architectures: s390 +Parameters: none +Returns: 0 on success; -1 on error + +This capability enables support for handling of channel I/O instructions. + +TEST PENDING INTERRUPTION and the interrupt portion of TEST SUBCHANNEL are +handled in-kernel, while the other I/O instructions are passed to userspace. + +When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST +SUBCHANNEL intercepts. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 29363d155cd5..16bd5d169cdb 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -262,6 +262,7 @@ struct kvm_arch{ debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; struct gmap *gmap; + int css_support; }; extern int sie64a(struct kvm_s390_sie_block *, u64 *); diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 71af87dbb42c..f26ff1e31bdb 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -264,6 +264,7 @@ static const intercept_handler_t intercept_funcs[] = { [0x0C >> 2] = handle_instruction_and_prog, [0x10 >> 2] = handle_noop, [0x14 >> 2] = handle_noop, + [0x18 >> 2] = handle_noop, [0x1C >> 2] = kvm_s390_handle_wait, [0x20 >> 2] = handle_validity, [0x28 >> 2] = handle_stop, diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index b3b4748485ee..9a128357fd15 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -709,6 +709,43 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) return 0; } +struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, + u64 cr6, u64 schid) +{ + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *inti, *iter; + + if ((!schid && !cr6) || (schid && cr6)) + return NULL; + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + inti = NULL; + list_for_each_entry(iter, &fi->list, list) { + if (!is_ioint(iter->type)) + continue; + if (cr6 && ((cr6 & iter->io.io_int_word) == 0)) + continue; + if (schid) { + if (((schid & 0x00000000ffff0000) >> 16) != + iter->io.subchannel_id) + continue; + if ((schid & 0x000000000000ffff) != + iter->io.subchannel_nr) + continue; + } + inti = iter; + break; + } + if (inti) + list_del_init(&inti->list); + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + return inti; +} + int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5ff26033825c..5b01f0953900 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -141,6 +141,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SYNC_REGS: case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: + case KVM_CAP_S390_CSS_SUPPORT: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -235,6 +236,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.gmap) goto out_nogmap; } + + kvm->arch.css_support = 0; + return 0; out_nogmap: debug_unregister(kvm->arch.dbf); @@ -658,6 +662,7 @@ rerun_vcpu: case KVM_EXIT_INTR: case KVM_EXIT_S390_RESET: case KVM_EXIT_S390_UCONTROL: + case KVM_EXIT_S390_TSCH: break; default: BUG(); @@ -818,6 +823,13 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return -EINVAL; switch (cap->cap) { + case KVM_CAP_S390_CSS_SUPPORT: + if (!vcpu->kvm->arch.css_support) { + vcpu->kvm->arch.css_support = 1; + trace_kvm_s390_enable_css(vcpu->kvm); + } + r = 0; + break; default: r = -EINVAL; break; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 211b340385a7..3e05deff21b6 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -113,6 +113,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt *s390int); int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); +struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, + u64 cr6, u64 schid); /* implemented in priv.c */ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 8ad776f87856..0ef9894606e5 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -127,15 +127,98 @@ static int handle_skey(struct kvm_vcpu *vcpu) return 0; } -static int handle_io_inst(struct kvm_vcpu *vcpu) +static int handle_tpi(struct kvm_vcpu *vcpu) { - VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); - /* condition code 3 */ + u64 addr; + struct kvm_s390_interrupt_info *inti; + int cc; + + addr = kvm_s390_get_base_disp_s(vcpu); + + inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); + if (inti) { + if (addr) { + /* + * Store the two-word I/O interruption code into the + * provided area. + */ + put_guest_u16(vcpu, addr, inti->io.subchannel_id); + put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr); + put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm); + } else { + /* + * Store the three-word I/O interruption code into + * the appropriate lowcore area. + */ + put_guest_u16(vcpu, 184, inti->io.subchannel_id); + put_guest_u16(vcpu, 186, inti->io.subchannel_nr); + put_guest_u32(vcpu, 188, inti->io.io_int_parm); + put_guest_u32(vcpu, 192, inti->io.io_int_word); + } + cc = 1; + } else + cc = 0; + kfree(inti); + /* Set condition code and we're done. */ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; return 0; } +static int handle_tsch(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_interrupt_info *inti; + + inti = kvm_s390_get_io_int(vcpu->kvm, 0, + vcpu->run->s.regs.gprs[1]); + + /* + * Prepare exit to userspace. + * We indicate whether we dequeued a pending I/O interrupt + * so that userspace can re-inject it if the instruction gets + * a program check. While this may re-order the pending I/O + * interrupts, this is no problem since the priority is kept + * intact. + */ + vcpu->run->exit_reason = KVM_EXIT_S390_TSCH; + vcpu->run->s390_tsch.dequeued = !!inti; + if (inti) { + vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id; + vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr; + vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm; + vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word; + } + vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb; + kfree(inti); + return -EREMOTE; +} + +static int handle_io_inst(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); + + if (vcpu->kvm->arch.css_support) { + /* + * Most I/O instructions will be handled by userspace. + * Exceptions are tpi and the interrupt portion of tsch. + */ + if (vcpu->arch.sie_block->ipa == 0xb236) + return handle_tpi(vcpu); + if (vcpu->arch.sie_block->ipa == 0xb235) + return handle_tsch(vcpu); + /* Handle in userspace. */ + return -EOPNOTSUPP; + } else { + /* + * Set condition code 3 to stop the guest from issueing channel + * I/O instructions. + */ + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + return 0; + } +} + static int handle_stfl(struct kvm_vcpu *vcpu) { unsigned int facility_list; diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 95fbc1ab88dc..13f30f58a2df 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -204,6 +204,26 @@ TRACE_EVENT(kvm_s390_stop_request, ); +/* + * Trace point for enabling channel I/O instruction support. + */ +TRACE_EVENT(kvm_s390_enable_css, + TP_PROTO(void *kvm), + TP_ARGS(kvm), + + TP_STRUCT__entry( + __field(void *, kvm) + ), + + TP_fast_assign( + __entry->kvm = kvm; + ), + + TP_printk("enabling channel I/O support (kvm @ %p)\n", + __entry->kvm) + ); + + #endif /* _TRACE_KVMS390_H */ /* This part must be outside protection */ diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 7ef9e759f499..a23f47c884cf 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -14,7 +14,7 @@ ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR), \ ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\ ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \ - ERSN(S390_UCONTROL) + ERSN(S390_UCONTROL), ERSN(S390_TSCH) TRACE_EVENT(kvm_userspace_exit, TP_PROTO(__u32 reason, int errno), diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 80bb3b801116..8bb0bf83afc5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -168,6 +168,7 @@ struct kvm_pit_config { #define KVM_EXIT_PAPR_HCALL 19 #define KVM_EXIT_S390_UCONTROL 20 #define KVM_EXIT_WATCHDOG 21 +#define KVM_EXIT_S390_TSCH 22 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -285,6 +286,15 @@ struct kvm_run { __u64 ret; __u64 args[9]; } papr_hcall; + /* KVM_EXIT_S390_TSCH */ + struct { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; + __u32 ipb; + __u8 dequeued; + } s390_tsch; /* Fix the size of the union. */ char padding[256]; }; @@ -645,6 +655,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 +#define KVM_CAP_S390_CSS_SUPPORT 85 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 390a1bd8538132186ddb679cafe9e75b7ef7e2d2 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 19 Dec 2012 19:11:32 +0100 Subject: NFC: Initial Secure Element API Each NFC adapter can have several links to different secure elements and that property needs to be exported by the drivers. A secure element link can be enabled and disabled, and card emulation will be handled by the currently active one. Otherwise card emulation will be host implemented. Signed-off-by: Samuel Ortiz --- drivers/nfc/nfcwilink.c | 1 + drivers/nfc/pn533.c | 1 + drivers/nfc/pn544/pn544.c | 6 ++++-- include/net/nfc/hci.h | 3 +++ include/net/nfc/nci_core.h | 1 + include/net/nfc/nfc.h | 6 ++++++ include/uapi/linux/nfc.h | 14 ++++++++++++++ net/nfc/core.c | 3 +++ net/nfc/hci/core.c | 3 ++- net/nfc/nci/core.c | 2 ++ net/nfc/netlink.c | 1 + 11 files changed, 38 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/nfc/nfcwilink.c b/drivers/nfc/nfcwilink.c index c7c182d2b7df..3b731acbc408 100644 --- a/drivers/nfc/nfcwilink.c +++ b/drivers/nfc/nfcwilink.c @@ -542,6 +542,7 @@ static int nfcwilink_probe(struct platform_device *pdev) drv->ndev = nci_allocate_device(&nfcwilink_ops, protocols, + NFC_SE_NONE, NFCWILINK_HDR_LEN, 0); if (!drv->ndev) { diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index e8c083203b33..31a5b3b53b2a 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -2525,6 +2525,7 @@ static int pn533_probe(struct usb_interface *interface, dev->nfc_dev = nfc_allocate_device(&pn533_nfc_ops, protocols, + NFC_SE_NONE, dev->ops->tx_header_len + PN533_CMD_DATAEXCH_HEAD_LEN, dev->ops->tx_tail_len); diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index d108c794008d..9c5f16e7baef 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -801,7 +801,7 @@ int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, struct nfc_hci_dev **hdev) { struct pn544_hci_info *info; - u32 protocols; + u32 protocols, se; struct nfc_hci_init_data init_data; int r; @@ -834,8 +834,10 @@ int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, NFC_PROTO_ISO14443_B_MASK | NFC_PROTO_NFC_DEP_MASK; + se = NFC_SE_UICC | NFC_SE_EMBEDDED; + info->hdev = nfc_hci_allocate_device(&pn544_hci_ops, &init_data, 0, - protocols, llc_name, + protocols, se, llc_name, phy_headroom + PN544_CMDS_HEADROOM, phy_tailroom, phy_payload); if (!info->hdev) { diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 2ff71750c428..b87a1692b086 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -59,6 +59,8 @@ struct nfc_hci_ops { struct nfc_target *target); int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event, struct sk_buff *skb); + int (*enable_se)(struct nfc_dev *dev, u32 secure_element); + int (*disable_se)(struct nfc_dev *dev, u32 secure_element); }; /* Pipes */ @@ -150,6 +152,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, + u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index d705d8674949..5bc0c460edc0 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -147,6 +147,7 @@ struct nci_dev { /* ----- NCI Devices ----- */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, + __u32 supported_se, int tx_headroom, int tx_tailroom); void nci_free_device(struct nci_dev *ndev); diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 1665674e86b2..87a6417fc934 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -68,6 +68,8 @@ struct nfc_ops { void *cb_context); int (*tm_send)(struct nfc_dev *dev, struct sk_buff *skb); int (*check_presence)(struct nfc_dev *dev, struct nfc_target *target); + int (*enable_se)(struct nfc_dev *dev, u32 secure_element); + int (*disable_se)(struct nfc_dev *dev, u32 secure_element); }; #define NFC_TARGET_IDX_ANY -1 @@ -109,6 +111,9 @@ struct nfc_dev { struct nfc_genl_data genl_data; u32 supported_protocols; + u32 supported_se; + u32 active_se; + int tx_headroom; int tx_tailroom; @@ -125,6 +130,7 @@ extern struct class nfc_class; struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, + u32 supported_se, int tx_headroom, int tx_tailroom); diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 0e63cee8d810..80e4ecd8c04c 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -67,6 +67,11 @@ * subsequent CONNECT and CC messages. * If one of the passed parameters is wrong none is set and -EINVAL is * returned. + * @NFC_CMD_ENABLE_SE: Enable the physical link to a specific secure element. + * Once enabled a secure element will handle card emulation mode, i.e. + * starting a poll from a device which has a secure element enabled means + * we want to do SE based card emulation. + * @NFC_CMD_DISABLE_SE: Disable the physical link to a specific secure element. */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -86,6 +91,8 @@ enum nfc_commands { NFC_EVENT_TM_DEACTIVATED, NFC_CMD_LLC_GET_PARAMS, NFC_CMD_LLC_SET_PARAMS, + NFC_CMD_ENABLE_SE, + NFC_CMD_DISABLE_SE, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -114,6 +121,7 @@ enum nfc_commands { * @NFC_ATTR_LLC_PARAM_LTO: Link TimeOut parameter * @NFC_ATTR_LLC_PARAM_RW: Receive Window size parameter * @NFC_ATTR_LLC_PARAM_MIUX: MIU eXtension parameter + * @NFC_ATTR_SE: Available Secure Elements */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -134,6 +142,7 @@ enum nfc_attrs { NFC_ATTR_LLC_PARAM_LTO, NFC_ATTR_LLC_PARAM_RW, NFC_ATTR_LLC_PARAM_MIUX, + NFC_ATTR_SE, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; @@ -172,6 +181,11 @@ enum nfc_attrs { #define NFC_PROTO_NFC_DEP_MASK (1 << NFC_PROTO_NFC_DEP) #define NFC_PROTO_ISO14443_B_MASK (1 << NFC_PROTO_ISO14443_B) +/* NFC Secure Elements */ +#define NFC_SE_NONE 0x0 +#define NFC_SE_UICC 0x1 +#define NFC_SE_EMBEDDED 0x2 + struct sockaddr_nfc { sa_family_t sa_family; __u32 dev_idx; diff --git a/net/nfc/core.c b/net/nfc/core.c index 7d7b4ee34015..25522e56d350 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -757,6 +757,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx) */ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, + u32 supported_se, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; @@ -774,6 +775,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; + dev->supported_se = supported_se; + dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 755a6b9774ab..91020b210d87 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -797,6 +797,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, + u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, @@ -822,7 +823,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, return NULL; } - hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, + hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, supported_se, tx_headroom + HCI_CMDS_HEADROOM, tx_tailroom); if (!hdev->ndev) { diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 5f98dc1bf039..48ada0ec749e 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -658,6 +658,7 @@ static struct nfc_ops nci_nfc_ops = { */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, + __u32 supported_se, int tx_headroom, int tx_tailroom) { struct nci_dev *ndev; @@ -680,6 +681,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, + supported_se, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 3568ae16786d..504b883439f1 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -366,6 +366,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || + nla_put_u32(msg, NFC_ATTR_SE, dev->supported_se) || nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; -- cgit v1.2.3 From 1c810636556c8d53a37406b34a64d9b9b0161aa6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 4 Jan 2013 18:12:48 +0100 Subject: KVM: PPC: BookE: Implement EPR exit The External Proxy Facility in FSL BookE chips allows the interrupt controller to automatically acknowledge an interrupt as soon as a core gets its pending external interrupt delivered. Today, user space implements the interrupt controller, so we need to check on it during such a cycle. This patch implements logic for user space to enable EPR exiting, disable EPR exiting and EPR exiting itself, so that user space can acknowledge an interrupt when an external interrupt has successfully been delivered into the guest vcpu. Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 40 +++++++++++++++++++++++++++++++++++-- arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/include/asm/kvm_ppc.h | 9 +++++++++ arch/powerpc/kvm/booke.c | 14 ++++++++++++- arch/powerpc/kvm/powerpc.c | 10 ++++++++++ include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 6 ++++++ 7 files changed, 79 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4fc2bfcb16d5..a98ed09269d7 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2246,8 +2246,8 @@ executed a memory-mapped I/O instruction which could not be satisfied by kvm. The 'data' member contains the written data if 'is_write' is true, and should be filled by application code otherwise. -NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR - and KVM_EXIT_PAPR the corresponding +NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR, + KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. Userspace @@ -2366,6 +2366,25 @@ interrupt for the target subchannel has been dequeued and subchannel_id, subchannel_nr, io_int_parm and io_int_word contain the parameters for that interrupt. ipb is needed for instruction parameter decoding. + /* KVM_EXIT_EPR */ + struct { + __u32 epr; + } epr; + +On FSL BookE PowerPC chips, the interrupt controller has a fast patch +interrupt acknowledge path to the core. When the core successfully +delivers an interrupt, it automatically populates the EPR register with +the interrupt vector number and acknowledges the interrupt inside +the interrupt controller. + +In case the interrupt controller lives in user space, we need to do +the interrupt acknowledge cycle through it to fetch the next to be +delivered interrupt vector using this exit. + +It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an +external interrupt has just been delivered into the guest. User space +should put the acknowledged interrupt vector into the 'epr' field. + /* Fix the size of the union. */ char padding[256]; }; @@ -2501,3 +2520,20 @@ handled in-kernel, while the other I/O instructions are passed to userspace. When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST SUBCHANNEL intercepts. + +6.5 KVM_CAP_PPC_EPR + +Architectures: ppc +Parameters: args[0] defines whether the proxy facility is active +Returns: 0 on success; -1 on error + +This capability enables or disables the delivery of interrupts through the +external proxy facility. + +When enabled (args[0] != 0), every time the guest gets an external interrupt +delivered, it automatically exits into user space with a KVM_EXIT_EPR exit +to receive the topmost interrupt vector. + +When disabled (args[0] == 0), behavior is as if this facility is unsupported. + +When this capability is enabled, KVM_EXIT_EPR can occur. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index ab49c6cf891c..8a72d59467eb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -520,6 +520,8 @@ struct kvm_vcpu_arch { u8 sane; u8 cpu_type; u8 hcall_needed; + u8 epr_enabled; + u8 epr_needed; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 5f5f69abd281..493630e209c8 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -264,6 +264,15 @@ static inline void kvm_linear_init(void) {} #endif +static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GEPR, epr); +#elif defined(CONFIG_BOOKE) + vcpu->arch.epr = epr; +#endif +} + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg); int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 964f4475f55c..940ec806187e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -306,7 +306,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, { int allowed = 0; ulong msr_mask = 0; - bool update_esr = false, update_dear = false; + bool update_esr = false, update_dear = false, update_epr = false; ulong crit_raw = vcpu->arch.shared->critical; ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); bool crit; @@ -330,6 +330,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, keep_irq = true; } + if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) + update_epr = true; + switch (priority) { case BOOKE_IRQPRIO_DTLB_MISS: case BOOKE_IRQPRIO_DATA_STORAGE: @@ -408,6 +411,8 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) set_guest_dear(vcpu, vcpu->arch.queued_dear); + if (update_epr == true) + kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); new_msr &= msr_mask; #if defined(CONFIG_64BIT) @@ -615,6 +620,13 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) r = 0; } + if (kvm_check_request(KVM_REQ_EPR_EXIT, vcpu)) { + vcpu->run->epr.epr = 0; + vcpu->arch.epr_needed = true; + vcpu->run->exit_reason = KVM_EXIT_EPR; + r = 0; + } + return r; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index e2225e5b8a4c..934413cd3a1b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -306,6 +306,7 @@ int kvm_dev_ioctl_check_extension(long ext) #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: case KVM_CAP_PPC_BOOKE_WATCHDOG: + case KVM_CAP_PPC_EPR: #else case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_HIOR: @@ -721,6 +722,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) for (i = 0; i < 9; ++i) kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]); vcpu->arch.hcall_needed = 0; +#ifdef CONFIG_BOOKE + } else if (vcpu->arch.epr_needed) { + kvmppc_set_epr(vcpu, run->epr.epr); + vcpu->arch.epr_needed = 0; +#endif } r = kvmppc_vcpu_run(run, vcpu); @@ -762,6 +768,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; + case KVM_CAP_PPC_EPR: + r = 0; + vcpu->arch.epr_enabled = cap->args[0]; + break; #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_WATCHDOG: r = 0; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cbe0d683e2e5..4dd7d7531e69 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -122,6 +122,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_WATCHDOG 18 #define KVM_REQ_MASTERCLOCK_UPDATE 19 #define KVM_REQ_MCLOCK_INPROGRESS 20 +#define KVM_REQ_EPR_EXIT 21 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 8bb0bf83afc5..9a2db5767ed5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -169,6 +169,7 @@ struct kvm_pit_config { #define KVM_EXIT_S390_UCONTROL 20 #define KVM_EXIT_WATCHDOG 21 #define KVM_EXIT_S390_TSCH 22 +#define KVM_EXIT_EPR 23 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -295,6 +296,10 @@ struct kvm_run { __u32 ipb; __u8 dequeued; } s390_tsch; + /* KVM_EXIT_EPR */ + struct { + __u32 epr; + } epr; /* Fix the size of the union. */ char padding[256]; }; @@ -656,6 +661,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 #define KVM_CAP_S390_CSS_SUPPORT 85 +#define KVM_CAP_PPC_EPR 86 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From dd3332bfcb2223458f553f341d3388cb84040e6a Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 13 Jan 2013 05:02:45 +0000 Subject: ipv6: Store Router Alert option in IP6CB directly. Router Alert option is very small and we can store the value itself in the skb. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++- include/uapi/linux/ipv6.h | 2 ++ net/ipv6/exthdrs.c | 3 ++- net/ipv6/ip6_input.c | 5 ++--- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 304a9f46b578..e971e3742172 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -84,7 +84,7 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) struct inet6_skb_parm { int iif; - __u16 ra; + __be16 ra; __u16 hop; __u16 dst0; __u16 srcrt; @@ -100,6 +100,7 @@ struct inet6_skb_parm { #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 #define IP6SKB_REROUTED 4 +#define IP6SKB_ROUTERALERT 8 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 5a2991cf0251..4bda4cf5b0f5 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -63,6 +63,8 @@ struct ipv6_opt_hdr { #define ipv6_destopt_hdr ipv6_opt_hdr #define ipv6_hopopt_hdr ipv6_opt_hdr +/* Router Alert option values (RFC2711) */ +#define IPV6_OPT_ROUTERALERT_MLD 0x0000 /* MLD(RFC2710) */ /* * routing header type 0 (used in cmsghdr struct) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 473f628f9f20..07a7d65a7cb6 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -553,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { - IP6CB(skb)->ra = optoff; + IP6CB(skb)->flags |= IP6SKB_ROUTERALERT; + memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 2ccd35ec3628..4ac5bf30e16a 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -280,9 +280,8 @@ int ip6_mc_input(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); /* Check for MLD */ - if (unlikely(opt->ra)) { + if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { /* Check if this is a mld message */ - u8 *ptr = skb_network_header(skb) + opt->ra; u8 nexthdr = hdr->nexthdr; __be16 frag_off; int offset; @@ -290,7 +289,7 @@ int ip6_mc_input(struct sk_buff *skb) /* Check if the value of Router Alert * is for MLD (0x0000). */ - if ((ptr[2] | ptr[3]) == 0) { + if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) { deliver = false; if (!ipv6_ext_hdr(nexthdr)) { -- cgit v1.2.3 From 25d46f43a911b08c5aa8c8fd4fe7fa9b36445068 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 13 Jan 2013 16:02:06 +0000 Subject: ipv6: Move comment to right place. IN6ADDR_* and in6addr_* are not exported to userspace, and are defined in include/linux/in6.h. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/in6.h | 4 ++++ include/uapi/linux/in6.h | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index 9e2ae26fb598..a16e19349ec0 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -22,6 +22,10 @@ #include +/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 + * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined + * in network byte order, not in host byte order as are the IPv4 equivalents + */ extern const struct in6_addr in6addr_any; #define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } } extern const struct in6_addr in6addr_loopback; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index f79c3721da6e..5673b97dcf54 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -38,11 +38,6 @@ struct in6_addr { #define s6_addr32 in6_u.u6_addr32 }; -/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 - * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined - * in network byte order, not in host byte order as are the IPv4 equivalents - */ - struct sockaddr_in6 { unsigned short int sin6_family; /* AF_INET6 */ __be16 sin6_port; /* Transport layer port # */ -- cgit v1.2.3 From d3710e74cf329839dea8d13b1ad56e572b06b173 Mon Sep 17 00:00:00 2001 From: Lauro Ramos Venancio Date: Wed, 5 Dec 2012 21:12:25 -0300 Subject: NFC: Change nfc.h license nfc.h being GPL makes it quite controversial for non GPL applications to include it. Moreover, nfc.h only includes structures and API definitions that are hardly copyrightable. Signed-off-by: Lauro Ramos Venancio Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 80e4ecd8c04c..7969f46f1bb3 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -5,20 +5,17 @@ * Lauro Ramos Venancio * Aloisio Almeida Jr * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef __LINUX_NFC_H -- cgit v1.2.3 From 3b1c5a5307fb5277f395efdcf330c064d79df07d Mon Sep 17 00:00:00 2001 From: Marco Porsch Date: Mon, 7 Jan 2013 16:04:52 +0100 Subject: {cfg,nl}80211: mesh power mode primitives and userspace access Add the nl80211_mesh_power_mode enumeration which holds possible values for the mesh power mode. These modes are unknown, active, light sleep and deep sleep. Add power_mode entry to the mesh config structure to hold the user-configured default mesh power mode. This value will be used for new peer links. Add the dot11MeshAwakeWindowDuration value to the mesh config. The awake window is a duration in TU describing how long the STA will stay awake after transmitting its beacon in PS mode. Add access routines to: - get/set local link-specific power mode (STA) - get remote STA's link-specific power mode (STA) - get remote STA's non-peer power mode (STA) - get/set default mesh power mode (mesh config) - get/set mesh awake window duration (mesh config) All config changes may be done at mesh runtime and take effect immediately. Signed-off-by: Marco Porsch Signed-off-by: Ivan Bezyazychnyy Signed-off-by: Mike Krinkin [fix commit message line length, error handling in set station] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 21 ++++++++++++++++++++ include/uapi/linux/nl80211.h | 47 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/mesh.c | 3 +++ net/wireless/nl80211.c | 43 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 113 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 516aded3697f..d9f08f65f7a5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -610,6 +610,8 @@ enum station_parameters_apply_mask { * @sta_modify_mask: bitmap indicating which parameters changed * (for those that don't have a natural "no change" value), * see &enum station_parameters_apply_mask + * @local_pm: local link-specific mesh power save mode (no change when set + * to unknown) */ struct station_parameters { u8 *supported_rates; @@ -625,6 +627,7 @@ struct station_parameters { struct ieee80211_vht_cap *vht_capa; u8 uapsd_queues; u8 max_sp; + enum nl80211_mesh_power_mode local_pm; }; /** @@ -655,6 +658,9 @@ struct station_parameters { * @STATION_INFO_STA_FLAGS: @sta_flags filled * @STATION_INFO_BEACON_LOSS_COUNT: @beacon_loss_count filled * @STATION_INFO_T_OFFSET: @t_offset filled + * @STATION_INFO_LOCAL_PM: @local_pm filled + * @STATION_INFO_PEER_PM: @peer_pm filled + * @STATION_INFO_NONPEER_PM: @nonpeer_pm filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -678,6 +684,9 @@ enum station_info_flags { STATION_INFO_STA_FLAGS = 1<<18, STATION_INFO_BEACON_LOSS_COUNT = 1<<19, STATION_INFO_T_OFFSET = 1<<20, + STATION_INFO_LOCAL_PM = 1<<21, + STATION_INFO_PEER_PM = 1<<22, + STATION_INFO_NONPEER_PM = 1<<23, }; /** @@ -791,6 +800,9 @@ struct sta_bss_parameters { * @sta_flags: station flags mask & values * @beacon_loss_count: Number of times beacon loss event has triggered. * @t_offset: Time offset of the station relative to this host. + * @local_pm: local mesh STA power save mode + * @peer_pm: peer mesh STA power save mode + * @nonpeer_pm: non-peer mesh STA power save mode */ struct station_info { u32 filled; @@ -820,6 +832,9 @@ struct station_info { u32 beacon_loss_count; s64 t_offset; + enum nl80211_mesh_power_mode local_pm; + enum nl80211_mesh_power_mode peer_pm; + enum nl80211_mesh_power_mode nonpeer_pm; /* * Note: Add a new enum station_info_flags value for each new field and @@ -995,6 +1010,10 @@ struct bss_parameters { * @dot11MeshHWMPconfirmationInterval: The minimum interval of time (in TUs) * during which a mesh STA can send only one Action frame containing * a PREQ element for root path confirmation. + * @power_mode: The default mesh power save mode which will be the initial + * setting for new peer links. + * @dot11MeshAwakeWindowDuration: The duration in TUs the STA will remain awake + * after transmitting its beacon. */ struct mesh_config { u16 dot11MeshRetryTimeout; @@ -1022,6 +1041,8 @@ struct mesh_config { u32 dot11MeshHWMPactivePathToRootTimeout; u16 dot11MeshHWMProotInterval; u16 dot11MeshHWMPconfirmationInterval; + enum nl80211_mesh_power_mode power_mode; + u16 dot11MeshAwakeWindowDuration; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 547017100a30..6c4f703ae890 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1310,6 +1310,9 @@ enum nl80211_commands { * if not given in START_AP 0 is assumed, if not given in SET_BSS * no change is made. * + * @NL80211_ATTR_LOCAL_MESH_POWER_MODE: local mesh STA link-specific power mode + * defined in &enum nl80211_mesh_power_mode. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1580,6 +1583,8 @@ enum nl80211_attrs { NL80211_ATTR_P2P_CTWINDOW, NL80211_ATTR_P2P_OPPPS, + NL80211_ATTR_LOCAL_MESH_POWER_MODE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1838,6 +1843,10 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_STA_FLAGS: Contains a struct nl80211_sta_flag_update. * @NL80211_STA_INFO_BEACON_LOSS: count of times beacon loss was detected (u32) * @NL80211_STA_INFO_T_OFFSET: timing offset with respect to this STA (s64) + * @NL80211_STA_INFO_LOCAL_PM: local mesh STA link-specific power mode + * @NL80211_STA_INFO_PEER_PM: peer mesh STA link-specific power mode + * @NL80211_STA_INFO_NONPEER_PM: neighbor mesh STA power save mode towards + * non-peer STA * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -1862,6 +1871,9 @@ enum nl80211_sta_info { NL80211_STA_INFO_STA_FLAGS, NL80211_STA_INFO_BEACON_LOSS, NL80211_STA_INFO_T_OFFSET, + NL80211_STA_INFO_LOCAL_PM, + NL80211_STA_INFO_PEER_PM, + NL80211_STA_INFO_NONPEER_PM, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, @@ -2252,6 +2264,34 @@ enum nl80211_mntr_flags { NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1 }; +/** + * enum nl80211_mesh_power_mode - mesh power save modes + * + * @NL80211_MESH_POWER_UNKNOWN: The mesh power mode of the mesh STA is + * not known or has not been set yet. + * @NL80211_MESH_POWER_ACTIVE: Active mesh power mode. The mesh STA is + * in Awake state all the time. + * @NL80211_MESH_POWER_LIGHT_SLEEP: Light sleep mode. The mesh STA will + * alternate between Active and Doze states, but will wake up for + * neighbor's beacons. + * @NL80211_MESH_POWER_DEEP_SLEEP: Deep sleep mode. The mesh STA will + * alternate between Active and Doze states, but may not wake up + * for neighbor's beacons. + * + * @__NL80211_MESH_POWER_AFTER_LAST - internal use + * @NL80211_MESH_POWER_MAX - highest possible power save level + */ + +enum nl80211_mesh_power_mode { + NL80211_MESH_POWER_UNKNOWN, + NL80211_MESH_POWER_ACTIVE, + NL80211_MESH_POWER_LIGHT_SLEEP, + NL80211_MESH_POWER_DEEP_SLEEP, + + __NL80211_MESH_POWER_AFTER_LAST, + NL80211_MESH_POWER_MAX = __NL80211_MESH_POWER_AFTER_LAST - 1 +}; + /** * enum nl80211_meshconf_params - mesh configuration parameters * @@ -2346,6 +2386,11 @@ enum nl80211_mntr_flags { * (in TUs) during which a mesh STA can send only one Action frame * containing a PREQ element for root path confirmation. * + * @NL80211_MESHCONF_POWER_MODE: Default mesh power mode for new peer links. + * type &enum nl80211_mesh_power_mode (u32) + * + * @NL80211_MESHCONF_AWAKE_WINDOW: awake window duration (in TUs) + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -2375,6 +2420,8 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, + NL80211_MESHCONF_POWER_MODE, + NL80211_MESHCONF_AWAKE_WINDOW, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0fe8ceb5444e..55957a284f6c 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -46,6 +46,7 @@ #define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units (=TUs) */ #define MESH_DEFAULT_DTIM_PERIOD 2 +#define MESH_DEFAULT_AWAKE_WINDOW 10 /* in 1024 us units (=TUs) */ const struct mesh_config default_mesh_config = { .dot11MeshRetryTimeout = MESH_RET_T, @@ -72,6 +73,8 @@ const struct mesh_config default_mesh_config = { .dot11MeshHWMPactivePathToRootTimeout = MESH_PATH_TO_ROOT_TIMEOUT, .dot11MeshHWMProotInterval = MESH_ROOT_INTERVAL, .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, + .power_mode = NL80211_MESH_POWER_ACTIVE, + .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d5842eb35aec..1a7a710fe9bf 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3001,6 +3001,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, sinfo->beacon_loss_count)) goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_LOCAL_PM) && + nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM, + sinfo->local_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_PEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_PEER_PM, + sinfo->peer_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_NONPEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM, + sinfo->nonpeer_pm)) + goto nla_put_failure; if (sinfo->filled & STATION_INFO_BSS_PARAM) { bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) @@ -3206,6 +3218,17 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.plink_state = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) { + enum nl80211_mesh_power_mode pm = nla_get_u32( + info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]); + + if (pm <= NL80211_MESH_POWER_UNKNOWN || + pm > NL80211_MESH_POWER_MAX) + return -EINVAL; + + params.local_pm = pm; + } + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: @@ -3213,6 +3236,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow mesh-specific things */ if (params.plink_action) return -EINVAL; + if (params.local_pm) + return -EINVAL; /* TDLS can't be set, ... */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) @@ -3265,6 +3290,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow things sta doesn't support */ if (params.plink_action) return -EINVAL; + if (params.local_pm) + return -EINVAL; /* reject any changes other than AUTHORIZED */ if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) return -EINVAL; @@ -3922,7 +3949,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, cur_params.dot11MeshHWMProotInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, - cur_params.dot11MeshHWMPconfirmationInterval)) + cur_params.dot11MeshHWMPconfirmationInterval) || + nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, + cur_params.power_mode) || + nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, + cur_params.dot11MeshAwakeWindowDuration)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -3961,6 +3992,8 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 }, + [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -4088,6 +4121,14 @@ do { \ 1, 65535, mask, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode, + NL80211_MESH_POWER_ACTIVE, + NL80211_MESH_POWER_MAX, + mask, NL80211_MESHCONF_POWER_MODE, + nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, + 0, 65535, mask, + NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); if (mask_out) *mask_out = mask; -- cgit v1.2.3 From cee00a959c0a86571e6f99cf42f0261d7e54d2ae Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 15 Jan 2013 17:15:57 +0200 Subject: cfg80211: Allow use_mfp to be specified with the connect command The NL80211_ATTR_USE_MFP attribute was originally added for NL80211_CMD_ASSOCIATE, but it is actually as useful (if not even more useful) with NL80211_CMD_CONNECT, so process that attribute with the connect command, too. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 6 +++--- net/wireless/nl80211.c | 9 +++++++++ net/wireless/sme.c | 3 ++- 4 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d81e730962cc..f1686d460e6b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1465,6 +1465,7 @@ struct cfg80211_ibss_params { * @ie: IEs for association request * @ie_len: Length of assoc_ie in octets * @privacy: indicates whether privacy-enabled APs should be used + * @mfp: indicate whether management frame protection is used * @crypto: crypto settings * @key_len: length of WEP key for shared key authentication * @key_idx: index of WEP key for shared key authentication @@ -1485,6 +1486,7 @@ struct cfg80211_connect_params { u8 *ie; size_t ie_len; bool privacy; + enum nl80211_mfp mfp; struct cfg80211_crypto_settings crypto; const u8 *key; u8 key_len, key_idx; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6c4f703ae890..d01c16220dc5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -374,8 +374,8 @@ * requests to connect to a specified network but without separating * auth and assoc steps. For this, you need to specify the SSID in a * %NL80211_ATTR_SSID attribute, and can optionally specify the association - * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_MAC, - * %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, + * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP, + * %NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE and * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT. * Background scan period can optionally be @@ -958,7 +958,7 @@ enum nl80211_commands { * @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is * used for the association (&enum nl80211_mfp, represented as a u32); * this attribute can be used - * with %NL80211_CMD_ASSOCIATE request + * with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests * * @NL80211_ATTR_STA_FLAGS2: Attribute containing a * &struct nl80211_sta_flag_update. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d543cf152100..df82a5c9faee 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5932,6 +5932,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } + if (info->attrs[NL80211_ATTR_USE_MFP]) { + connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); + if (connect.mfp != NL80211_MFP_REQUIRED && + connect.mfp != NL80211_MFP_NO) + return -EINVAL; + } else { + connect.mfp = NL80211_MFP_NO; + } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { connect.channel = ieee80211_get_channel(wiphy, diff --git a/net/wireless/sme.c b/net/wireless/sme.c index d2d26518cdd7..a825dfe12cf7 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -192,7 +192,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) prev_bssid, params->ssid, params->ssid_len, params->ie, params->ie_len, - false, ¶ms->crypto, + params->mfp != NL80211_MFP_NO, + ¶ms->crypto, params->flags, ¶ms->ht_capa, ¶ms->ht_capa_mask); if (err) -- cgit v1.2.3 From 11c4a075db2f8774d37544342c8cb9752b4db9e1 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 8 Jan 2013 14:04:07 +0100 Subject: cfg80211: check radar interface combinations To ease further DFS development regarding interface combinations, use the interface combinations structure to test for radar capabilities. Drivers can specify which channel widths they support, and in which modes. Right now only a single AP interface is allowed, but as the DFS code evolves other combinations can be enabled. Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 3 +++ net/wireless/core.c | 12 ++++++++++-- net/wireless/core.h | 7 ++++--- net/wireless/nl80211.c | 3 +++ net/wireless/util.c | 41 ++++++++++++++++++++++++++++++++++++++--- 6 files changed, 60 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f1686d460e6b..970da4420676 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2125,6 +2125,7 @@ struct ieee80211_iface_limit { * @beacon_int_infra_match: In this combination, the beacon intervals * between infrastructure and AP types must match. This is required * only in special cases. + * @radar_detect_widths: bitmap of channel widths supported for radar detection * * These examples can be expressed as follows: * @@ -2177,6 +2178,7 @@ struct ieee80211_iface_combination { u16 max_interfaces; u8 n_limits; bool beacon_int_infra_match; + u8 radar_detect_widths; }; struct mac_address { diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d01c16220dc5..e6eeb4ba5dc5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2984,6 +2984,8 @@ enum nl80211_iface_limit_attrs { * the infrastructure network's beacon interval. * @NL80211_IFACE_COMB_NUM_CHANNELS: u32 attribute specifying how many * different channels may be used within this group. + * @NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS: u32 attribute containing the bitmap + * of supported channel widths for radar detection. * @NUM_NL80211_IFACE_COMB: number of attributes * @MAX_NL80211_IFACE_COMB: highest attribute number * @@ -3016,6 +3018,7 @@ enum nl80211_if_combination_attrs { NL80211_IFACE_COMB_MAXNUM, NL80211_IFACE_COMB_STA_AP_BI_MATCH, NL80211_IFACE_COMB_NUM_CHANNELS, + NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, /* keep last */ NUM_NL80211_IFACE_COMB, diff --git a/net/wireless/core.c b/net/wireless/core.c index 747dd9365a44..0e702cdc6043 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -382,8 +382,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c = &wiphy->iface_combinations[i]; - /* Combinations with just one interface aren't real */ - if (WARN_ON(c->max_interfaces < 2)) + /* + * Combinations with just one interface aren't real, + * however we make an exception for DFS. + */ + if (WARN_ON((c->max_interfaces < 2) && !c->radar_detect_widths)) return -EINVAL; /* Need at least one channel */ @@ -398,6 +401,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) CFG80211_MAX_NUM_DIFFERENT_CHANNELS)) return -EINVAL; + /* DFS only works on one channel. */ + if (WARN_ON(c->radar_detect_widths && + (c->num_different_channels > 1))) + return -EINVAL; + if (WARN_ON(!c->n_limits)) return -EINVAL; diff --git a/net/wireless/core.h b/net/wireless/core.h index f342267e3620..8396f7671c8d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -425,7 +425,8 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode); + enum cfg80211_chan_mode chanmode, + u8 radar_detect); static inline int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, @@ -433,7 +434,7 @@ cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL, - CHAN_MODE_UNDEFINED); + CHAN_MODE_UNDEFINED, 0); } static inline int @@ -450,7 +451,7 @@ cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode) { return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chan, chanmode); + chan, chanmode, 0); } void diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index df82a5c9faee..33de80364c5c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -856,6 +856,9 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, c->max_interfaces)) goto nla_put_failure; + if (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + c->radar_detect_widths)) + goto nla_put_failure; nla_nest_end(msg, nl_combi); } diff --git a/net/wireless/util.c b/net/wireless/util.c index 16d76a807c2f..1c2795d52db0 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1184,7 +1184,8 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode) + enum cfg80211_chan_mode chanmode, + u8 radar_detect) { struct wireless_dev *wdev_iter; u32 used_iftypes = BIT(iftype); @@ -1195,14 +1196,45 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chmode; int num_different_channels = 0; int total = 1; + bool radar_required; int i, j; ASSERT_RTNL(); lockdep_assert_held(&rdev->devlist_mtx); + if (WARN_ON(hweight32(radar_detect) > 1)) + return -EINVAL; + + switch (iftype) { + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_WDS: + radar_required = !!(chan->flags & IEEE80211_CHAN_RADAR); + break; + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_MONITOR: + radar_required = false; + break; + case NL80211_IFTYPE_P2P_DEVICE: + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_UNSPECIFIED: + default: + return -EINVAL; + } + + if (radar_required && !radar_detect) + return -EINVAL; + /* Always allow software iftypes */ - if (rdev->wiphy.software_iftypes & BIT(iftype)) + if (rdev->wiphy.software_iftypes & BIT(iftype)) { + if (radar_detect) + return -EINVAL; return 0; + } memset(num, 0, sizeof(num)); memset(used_channels, 0, sizeof(used_channels)); @@ -1275,7 +1307,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, used_iftypes |= BIT(wdev_iter->iftype); } - if (total == 1) + if (total == 1 && !radar_detect) return 0; for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { @@ -1308,6 +1340,9 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, } } + if (radar_detect && !(c->radar_detect_widths & radar_detect)) + goto cont; + /* * Finally check that all iftypes that we're currently * using are actually part of this combination. If they -- cgit v1.2.3 From c539f01717c239cfa0921dd43927afc976f1eedc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 11 Jan 2013 06:30:44 +0000 Subject: netfilter: add connlabel conntrack extension similar to connmarks, except labels are bit-based; i.e. all labels may be attached to a flow at the same time. Up to 128 labels are supported. Supporting more labels is possible, but requires increasing the ct offset delta from u8 to u16 type due to increased extension sizes. Mapping of bit-identifier to label name is done in userspace. The extension is enabled at run-time once "-m connlabel" netfilter rules are added. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_extend.h | 4 ++ include/net/netfilter/nf_conntrack_labels.h | 55 ++++++++++++++++ include/net/netns/conntrack.h | 4 ++ include/uapi/linux/netfilter/xt_connlabel.h | 12 ++++ net/netfilter/Kconfig | 18 ++++++ net/netfilter/Makefile | 2 + net/netfilter/nf_conntrack_core.c | 12 ++++ net/netfilter/nf_conntrack_labels.c | 72 +++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 3 + net/netfilter/xt_connlabel.c | 99 +++++++++++++++++++++++++++++ 10 files changed, 281 insertions(+) create mode 100644 include/net/netfilter/nf_conntrack_labels.h create mode 100644 include/uapi/linux/netfilter/xt_connlabel.h create mode 100644 net/netfilter/nf_conntrack_labels.c create mode 100644 net/netfilter/xt_connlabel.c (limited to 'include/uapi/linux') diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 8b4d1fc29096..977bc8a46444 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -22,6 +22,9 @@ enum nf_ct_ext_id { #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT NF_CT_EXT_TIMEOUT, +#endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + NF_CT_EXT_LABELS, #endif NF_CT_EXT_NUM, }; @@ -33,6 +36,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout +#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h new file mode 100644 index 000000000000..b94fe31c7b39 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include +#include + +#include + +struct nf_conn_labels { + u8 words; + unsigned long bits[]; +}; + +static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + return nf_ct_ext_find(ct, NF_CT_EXT_LABELS); +#else + return NULL; +#endif +} + +static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + struct nf_conn_labels *cl_ext; + struct net *net = nf_ct_net(ct); + u8 words; + + words = ACCESS_ONCE(net->ct.label_words); + if (words == 0 || WARN_ON_ONCE(words > 8)) + return NULL; + + cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, + words * sizeof(long), GFP_ATOMIC); + if (cl_ext != NULL) + cl_ext->words = words; + + return cl_ext; +#else + return NULL; +#endif +} + +bool nf_connlabel_match(const struct nf_conn *ct, u16 bit); +int nf_connlabel_set(struct nf_conn *ct, u16 bit); + +#ifdef CONFIG_NF_CONNTRACK_LABELS +int nf_conntrack_labels_init(struct net *net); +void nf_conntrack_labels_fini(struct net *net); +#else +static inline int nf_conntrack_labels_init(struct net *n) { return 0; } +static inline void nf_conntrack_labels_fini(struct net *net) {} +#endif diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 923cb20051ed..c9c0c538b68b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -84,6 +84,10 @@ struct netns_ct { int sysctl_auto_assign_helper; bool auto_assign_helper_warned; struct nf_ip_net nf_ct_proto; +#if defined(CONFIG_NF_CONNTRACK_LABELS) + unsigned int labels_used; + u8 label_words; +#endif #ifdef CONFIG_NF_NAT_NEEDED struct hlist_head *nat_bysource; unsigned int nat_htable_size; diff --git a/include/uapi/linux/netfilter/xt_connlabel.h b/include/uapi/linux/netfilter/xt_connlabel.h new file mode 100644 index 000000000000..c4bc9ee9b330 --- /dev/null +++ b/include/uapi/linux/netfilter/xt_connlabel.h @@ -0,0 +1,12 @@ +#include + +#define XT_CONNLABEL_MAXBIT 127 +enum xt_connlabel_mtopts { + XT_CONNLABEL_OP_INVERT = 1 << 0, + XT_CONNLABEL_OP_SET = 1 << 1, +}; + +struct xt_connlabel_mtinfo { + __u16 bit; + __u16 options; +}; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49e96df5fbc4..bb48607d4ee4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -124,6 +124,12 @@ config NF_CONNTRACK_TIMESTAMP If unsure, say `N'. +config NF_CONNTRACK_LABELS + bool + help + This option enables support for assigning user-defined flag bits + to connection tracking entries. It selected by the connlabel match. + config NF_CT_PROTO_DCCP tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' depends on EXPERIMENTAL @@ -842,6 +848,18 @@ config NETFILTER_XT_MATCH_CONNBYTES If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_CONNLABEL + tristate '"connlabel" match support' + select NF_CONNTRACK_LABELS + depends on NETFILTER_ADVANCED + ---help--- + This match allows you to test and assign userspace-defined labels names + to a connection. The kernel only stores bit values - mapping + names to bits is done by userspace. + + Unlike connmark, more than 32 flag bits may be assigned to a + connection simultaneously. + config NETFILTER_XT_MATCH_CONNLIMIT tristate '"connlimit" match support"' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 32596978df1d..b3bbda60945e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -4,6 +4,7 @@ nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_exp nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o +nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -101,6 +102,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLABEL) += xt_connlabel.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e4a0c4fb3a7c..85aa4b7149c5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -763,6 +764,7 @@ void nf_conntrack_free(struct nf_conn *ct) } EXPORT_SYMBOL_GPL(nf_conntrack_free); + /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * @@ -809,6 +811,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, @@ -1352,6 +1355,7 @@ static void nf_conntrack_cleanup_net(struct net *net) } nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); + nf_conntrack_labels_fini(net); nf_conntrack_helper_fini(net); nf_conntrack_timeout_fini(net); nf_conntrack_ecache_fini(net); @@ -1583,7 +1587,15 @@ static int nf_conntrack_init_net(struct net *net) ret = nf_conntrack_helper_init(net); if (ret < 0) goto err_helper; + + ret = nf_conntrack_labels_init(net); + if (ret < 0) + goto err_labels; + return 0; + +err_labels: + nf_conntrack_helper_fini(net); err_helper: nf_conntrack_timeout_fini(net); err_timeout: diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c new file mode 100644 index 000000000000..0c542f41f338 --- /dev/null +++ b/net/netfilter/nf_conntrack_labels.c @@ -0,0 +1,72 @@ +/* + * test/set flag bits stored in conntrack extension area. + * + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +static unsigned int label_bits(const struct nf_conn_labels *l) +{ + unsigned int longs = l->words; + return longs * BITS_PER_LONG; +} + +bool nf_connlabel_match(const struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return false; + + return bit < label_bits(labels) && test_bit(bit, labels->bits); +} +EXPORT_SYMBOL_GPL(nf_connlabel_match); + +int nf_connlabel_set(struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels || bit >= label_bits(labels)) + return -ENOSPC; + + if (test_bit(bit, labels->bits)) + return 0; + + if (test_and_set_bit(bit, labels->bits)) + return 0; + + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabel_set); + +static struct nf_ct_ext_type labels_extend __read_mostly = { + .len = sizeof(struct nf_conn_labels), + .align = __alignof__(struct nf_conn_labels), + .id = NF_CT_EXT_LABELS, +}; + +int nf_conntrack_labels_init(struct net *net) +{ + if (net_eq(net, &init_net)) + return nf_ct_extend_register(&labels_extend); + return 0; +} + +void nf_conntrack_labels_fini(struct net *net) +{ + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&labels_extend); +} diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 627b0e50b238..e0b10ee180ef 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_NF_NAT_NEEDED #include #include @@ -1598,6 +1599,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); + /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c new file mode 100644 index 000000000000..9f8719df2001 --- /dev/null +++ b/net/netfilter/xt_connlabel.c @@ -0,0 +1,99 @@ +/* + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); +MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); +MODULE_ALIAS("ipt_connlabel"); +MODULE_ALIAS("ip6t_connlabel"); + +static bool +connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_connlabel_mtinfo *info = par->matchinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + bool invert = info->options & XT_CONNLABEL_OP_INVERT; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL || nf_ct_is_untracked(ct)) + return invert; + + if (info->options & XT_CONNLABEL_OP_SET) + return (nf_connlabel_set(ct, info->bit) == 0) ^ invert; + + return nf_connlabel_match(ct, info->bit) ^ invert; +} + +static int connlabel_mt_check(const struct xt_mtchk_param *par) +{ + const int options = XT_CONNLABEL_OP_INVERT | + XT_CONNLABEL_OP_SET; + struct xt_connlabel_mtinfo *info = par->matchinfo; + int ret; + size_t words; + + if (info->bit > XT_CONNLABEL_MAXBIT) + return -ERANGE; + + if (info->options & ~options) { + pr_err("Unknown options in mask %x\n", info->options); + return -EINVAL; + } + + ret = nf_ct_l3proto_try_module_get(par->family); + if (ret < 0) { + pr_info("cannot load conntrack support for proto=%u\n", + par->family); + return ret; + } + + par->net->ct.labels_used++; + words = BITS_TO_LONGS(info->bit+1); + if (words > par->net->ct.label_words) + par->net->ct.label_words = words; + + return ret; +} + +static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) +{ + par->net->ct.labels_used--; + if (par->net->ct.labels_used == 0) + par->net->ct.label_words = 0; + nf_ct_l3proto_module_put(par->family); +} + +static struct xt_match connlabels_mt_reg __read_mostly = { + .name = "connlabel", + .family = NFPROTO_UNSPEC, + .checkentry = connlabel_mt_check, + .match = connlabel_mt, + .matchsize = sizeof(struct xt_connlabel_mtinfo), + .destroy = connlabel_mt_destroy, + .me = THIS_MODULE, +}; + +static int __init connlabel_mt_init(void) +{ + return xt_register_match(&connlabels_mt_reg); +} + +static void __exit connlabel_mt_exit(void) +{ + xt_unregister_match(&connlabels_mt_reg); +} + +module_init(connlabel_mt_init); +module_exit(connlabel_mt_exit); -- cgit v1.2.3 From 0ceabd83875b72a29f33db4ab703d6ba40ea4c58 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 11 Jan 2013 06:30:45 +0000 Subject: netfilter: ctnetlink: deliver labels to userspace Introduce CTA_LABELS attribute to send a bit-vector of currently active labels to userspace. Future patch will permit userspace to also set/delete active labels. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 1 + include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_labels.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 41 ++++++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 1644cdd8be91..d69483fb3825 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -101,6 +101,7 @@ enum ip_conntrack_events { IPCT_MARK, /* new mark has been set */ IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ IPCT_SECMARK, /* new security mark has been set */ + IPCT_LABEL, /* new connlabel has been set */ }; enum ip_conntrack_expect_events { diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 86e930cf3dfb..9e71e0c081fd 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -49,6 +49,7 @@ enum ctattr_type { CTA_SECCTX, CTA_TIMESTAMP, CTA_MARK_MASK, + CTA_LABELS, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 0c542f41f338..ac5d0807d681 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -46,7 +46,7 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) return 0; if (test_and_set_bit(bit, labels->bits)) - return 0; + nf_conntrack_event_cache(IPCT_LABEL, ct); return 0; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index e0b10ee180ef..5f5386382f13 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -324,6 +324,40 @@ nla_put_failure: #define ctnetlink_dump_secctx(a, b) (0) #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS +static int ctnetlink_label_size(const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return 0; + return nla_total_size(labels->words * sizeof(long)); +} + +static int +ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + unsigned int len, i; + + if (!labels) + return 0; + + len = labels->words * sizeof(long); + i = 0; + do { + if (labels->bits[i] != 0) + return nla_put(skb, CTA_LABELS, len, labels->bits); + i++; + } while (i < labels->words); + + return 0; +} +#else +#define ctnetlink_dump_labels(a, b) (0) +#define ctnetlink_label_size(a) (0) +#endif + #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) static inline int @@ -464,6 +498,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ctnetlink_dump_helpinfo(skb, ct) < 0 || ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || + ctnetlink_dump_labels(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || ctnetlink_dump_master(skb, ct) < 0 || @@ -562,6 +597,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif + ctnetlink_proto_size(ct) + + ctnetlink_label_size(ct) ; } @@ -663,6 +699,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) && ctnetlink_dump_secctx(skb, ct) < 0) goto nla_put_failure; #endif + if (events & (1 << IPCT_LABEL) && + ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; if (events & (1 << IPCT_RELATED) && ctnetlink_dump_master(skb, ct) < 0) @@ -1986,6 +2025,8 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif + if (ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; rcu_read_unlock(); return 0; -- cgit v1.2.3 From 9b21f6a90924dfe8e5e686c314ddb441fb06501e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 11 Jan 2013 06:30:46 +0000 Subject: netfilter: ctnetlink: allow userspace to modify labels Add the ability to set/clear labels assigned to a conntrack via ctnetlink. To allow userspace to only alter specific bits, Pablo suggested to add a new CTA_LABELS_MASK attribute: The new set of active labels is then determined via active = (active & ~mask) ^ changeset i.e., the mask selects those bits in the existing set that should be changed. This follows the same method already used by MARK and CONNMARK targets. Omitting CTA_LABELS_MASK is the same as setting all bits in CTA_LABELS_MASK to 1: The existing set is replaced by the one from userspace. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 3 ++ include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_labels.c | 43 +++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 44 ++++++++++++++++++++++ 4 files changed, 91 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index b94fe31c7b39..a3ce5d076fca 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -46,6 +46,9 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) bool nf_connlabel_match(const struct nf_conn *ct, u16 bit); int nf_connlabel_set(struct nf_conn *ct, u16 bit); +int nf_connlabels_replace(struct nf_conn *ct, + const u32 *data, const u32 *mask, unsigned int words); + #ifdef CONFIG_NF_CONNTRACK_LABELS int nf_conntrack_labels_init(struct net *net); void nf_conntrack_labels_fini(struct net *net); diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 9e71e0c081fd..08fabc6c93f3 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -50,6 +50,7 @@ enum ctattr_type { CTA_TIMESTAMP, CTA_MARK_MASK, CTA_LABELS, + CTA_LABELS_MASK, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index ac5d0807d681..e1d1eb850e7f 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -52,6 +52,49 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) } EXPORT_SYMBOL_GPL(nf_connlabel_set); +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) +static void replace_u32(u32 *address, u32 mask, u32 new) +{ + u32 old, tmp; + + do { + old = *address; + tmp = (old & mask) ^ new; + } while (cmpxchg(address, old, tmp) != old); +} + +int nf_connlabels_replace(struct nf_conn *ct, + const u32 *data, + const u32 *mask, unsigned int words32) +{ + struct nf_conn_labels *labels; + unsigned int size, i; + u32 *dst; + + labels = nf_ct_labels_find(ct); + if (!labels) + return -ENOSPC; + + size = labels->words * sizeof(long); + if (size < (words32 * sizeof(u32))) + words32 = size / sizeof(u32); + + dst = (u32 *) labels->bits; + if (words32) { + for (i = 0; i < words32; i++) + replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); + } + + size /= sizeof(u32); + for (i = words32; i < size; i++) /* pad */ + replace_u32(&dst[i], 0, 0); + + nf_conntrack_event_cache(IPCT_LABEL, ct); + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabels_replace); +#endif + static struct nf_ct_ext_type labels_extend __read_mostly = { .len = sizeof(struct nf_conn_labels), .align = __alignof__(struct nf_conn_labels), diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 5f5386382f13..2334cc5d2b16 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -961,6 +961,7 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, return 0; } +#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, [CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, @@ -977,6 +978,10 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED }, [CTA_ZONE] = { .type = NLA_U16 }, [CTA_MARK_MASK] = { .type = NLA_U32 }, + [CTA_LABELS] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, + [CTA_LABELS_MASK] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, }; static int @@ -1504,6 +1509,31 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct, } #endif +static int +ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + size_t len = nla_len(cda[CTA_LABELS]); + const void *mask = cda[CTA_LABELS_MASK]; + + if (len & (sizeof(u32)-1)) /* must be multiple of u32 */ + return -EINVAL; + + if (mask) { + if (nla_len(cda[CTA_LABELS_MASK]) == 0 || + nla_len(cda[CTA_LABELS_MASK]) != len) + return -EINVAL; + mask = nla_data(cda[CTA_LABELS_MASK]); + } + + len /= sizeof(u32); + + return nf_connlabels_replace(ct, nla_data(cda[CTA_LABELS]), mask, len); +#else + return -EOPNOTSUPP; +#endif +} + static int ctnetlink_change_conntrack(struct nf_conn *ct, const struct nlattr * const cda[]) @@ -1550,6 +1580,11 @@ ctnetlink_change_conntrack(struct nf_conn *ct, return err; } #endif + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } return 0; } @@ -1758,6 +1793,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, else events = IPCT_NEW; + if (cda[CTA_LABELS] && + ctnetlink_attach_labels(ct, cda) == 0) + events |= (1 << IPCT_LABEL); + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | @@ -2055,6 +2094,11 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; } + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); -- cgit v1.2.3 From 7d9f49afa451d8565d00a5cea39acf9bb26feb50 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Wed, 16 Jan 2013 20:28:40 -0800 Subject: serial: rp2: New driver for Comtrol RocketPort 2 cards This driver supports the RocketPort EXPRESS and RocketPort INFINITY families of PCI/PCIe multiport serial adapters. These adapters use a "RocketPort 2" ASIC that is not compatible with the original RocketPort driver (CONFIG_ROCKETPORT). Tested with the RocketPort EXPRESS Octa DB9 and Quad DB9. Also added an old RocketPort 8J PCI card to the same system to verify that rocket.c and rp2.c coexist peacefully. Signed-off-by: Kevin Cernekee Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 24 ++ drivers/tty/serial/Makefile | 1 + drivers/tty/serial/rp2.c | 885 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/serial_core.h | 3 + 4 files changed, 913 insertions(+) create mode 100644 drivers/tty/serial/rp2.c (limited to 'include/uapi/linux') diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index aff3cd356662..2dc429357fe3 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1458,4 +1458,28 @@ config SERIAL_ARC_NR_PORTS Set this to the number of serial ports you want the driver to support. +config SERIAL_RP2 + tristate "Comtrol RocketPort EXPRESS/INFINITY support" + depends on PCI + select SERIAL_CORE + help + This driver supports the Comtrol RocketPort EXPRESS and + RocketPort INFINITY families of PCI/PCIe multiport serial adapters. + These adapters use a "RocketPort 2" ASIC that is not compatible + with the original RocketPort driver (CONFIG_ROCKETPORT). + + To compile this driver as a module, choose M here: the + module will be called rp2. + + If you want to compile this driver into the kernel, say Y here. If + you don't have a suitable RocketPort card installed, say N. + +config SERIAL_RP2_NR_UARTS + int "Maximum number of RocketPort EXPRESS/INFINITY ports" + depends on SERIAL_RP2 + default "32" + help + If multiple cards are present, the default limit of 32 ports may + need to be increased. + endmenu diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile index 82e4306bf962..eedfec40e3dd 100644 --- a/drivers/tty/serial/Makefile +++ b/drivers/tty/serial/Makefile @@ -84,3 +84,4 @@ obj-$(CONFIG_SERIAL_TEGRA) += serial-tegra.o obj-$(CONFIG_SERIAL_AR933X) += ar933x_uart.o obj-$(CONFIG_SERIAL_EFM32_UART) += efm32-uart.o obj-$(CONFIG_SERIAL_ARC) += arc_uart.o +obj-$(CONFIG_SERIAL_RP2) += rp2.o diff --git a/drivers/tty/serial/rp2.c b/drivers/tty/serial/rp2.c new file mode 100644 index 000000000000..a314a943f124 --- /dev/null +++ b/drivers/tty/serial/rp2.c @@ -0,0 +1,885 @@ +/* + * Driver for Comtrol RocketPort EXPRESS/INFINITY cards + * + * Copyright (C) 2012 Kevin Cernekee + * + * Inspired by, and loosely based on: + * + * ar933x_uart.c + * Copyright (C) 2011 Gabor Juhos + * + * rocketport_infinity_express-linux-1.20.tar.gz + * Copyright (C) 2004-2011 Comtrol, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "rp2" + +#define RP2_FW_NAME "rp2.fw" +#define RP2_UCODE_BYTES 0x3f + +#define PORTS_PER_ASIC 16 +#define ALL_PORTS_MASK (BIT(PORTS_PER_ASIC) - 1) + +#define UART_CLOCK 44236800 +#define DEFAULT_BAUD_DIV (UART_CLOCK / (9600 * 16)) +#define FIFO_SIZE 512 + +/* BAR0 registers */ +#define RP2_FPGA_CTL0 0x110 +#define RP2_FPGA_CTL1 0x11c +#define RP2_IRQ_MASK 0x1ec +#define RP2_IRQ_MASK_EN_m BIT(0) +#define RP2_IRQ_STATUS 0x1f0 + +/* BAR1 registers */ +#define RP2_ASIC_SPACING 0x1000 +#define RP2_ASIC_OFFSET(i) ((i) << ilog2(RP2_ASIC_SPACING)) + +#define RP2_PORT_BASE 0x000 +#define RP2_PORT_SPACING 0x040 + +#define RP2_UCODE_BASE 0x400 +#define RP2_UCODE_SPACING 0x80 + +#define RP2_CLK_PRESCALER 0xc00 +#define RP2_CH_IRQ_STAT 0xc04 +#define RP2_CH_IRQ_MASK 0xc08 +#define RP2_ASIC_IRQ 0xd00 +#define RP2_ASIC_IRQ_EN_m BIT(20) +#define RP2_GLOBAL_CMD 0xd0c +#define RP2_ASIC_CFG 0xd04 + +/* port registers */ +#define RP2_DATA_DWORD 0x000 + +#define RP2_DATA_BYTE 0x008 +#define RP2_DATA_BYTE_ERR_PARITY_m BIT(8) +#define RP2_DATA_BYTE_ERR_OVERRUN_m BIT(9) +#define RP2_DATA_BYTE_ERR_FRAMING_m BIT(10) +#define RP2_DATA_BYTE_BREAK_m BIT(11) + +/* This lets uart_insert_char() drop bytes received on a !CREAD port */ +#define RP2_DUMMY_READ BIT(16) + +#define RP2_DATA_BYTE_EXCEPTION_MASK (RP2_DATA_BYTE_ERR_PARITY_m | \ + RP2_DATA_BYTE_ERR_OVERRUN_m | \ + RP2_DATA_BYTE_ERR_FRAMING_m | \ + RP2_DATA_BYTE_BREAK_m) + +#define RP2_RX_FIFO_COUNT 0x00c +#define RP2_TX_FIFO_COUNT 0x00e + +#define RP2_CHAN_STAT 0x010 +#define RP2_CHAN_STAT_RXDATA_m BIT(0) +#define RP2_CHAN_STAT_DCD_m BIT(3) +#define RP2_CHAN_STAT_DSR_m BIT(4) +#define RP2_CHAN_STAT_CTS_m BIT(5) +#define RP2_CHAN_STAT_RI_m BIT(6) +#define RP2_CHAN_STAT_OVERRUN_m BIT(13) +#define RP2_CHAN_STAT_DSR_CHANGED_m BIT(16) +#define RP2_CHAN_STAT_CTS_CHANGED_m BIT(17) +#define RP2_CHAN_STAT_CD_CHANGED_m BIT(18) +#define RP2_CHAN_STAT_RI_CHANGED_m BIT(22) +#define RP2_CHAN_STAT_TXEMPTY_m BIT(25) + +#define RP2_CHAN_STAT_MS_CHANGED_MASK (RP2_CHAN_STAT_DSR_CHANGED_m | \ + RP2_CHAN_STAT_CTS_CHANGED_m | \ + RP2_CHAN_STAT_CD_CHANGED_m | \ + RP2_CHAN_STAT_RI_CHANGED_m) + +#define RP2_TXRX_CTL 0x014 +#define RP2_TXRX_CTL_MSRIRQ_m BIT(0) +#define RP2_TXRX_CTL_RXIRQ_m BIT(2) +#define RP2_TXRX_CTL_RX_TRIG_s 3 +#define RP2_TXRX_CTL_RX_TRIG_m (0x3 << RP2_TXRX_CTL_RX_TRIG_s) +#define RP2_TXRX_CTL_RX_TRIG_1 (0x1 << RP2_TXRX_CTL_RX_TRIG_s) +#define RP2_TXRX_CTL_RX_TRIG_256 (0x2 << RP2_TXRX_CTL_RX_TRIG_s) +#define RP2_TXRX_CTL_RX_TRIG_448 (0x3 << RP2_TXRX_CTL_RX_TRIG_s) +#define RP2_TXRX_CTL_RX_EN_m BIT(5) +#define RP2_TXRX_CTL_RTSFLOW_m BIT(6) +#define RP2_TXRX_CTL_DTRFLOW_m BIT(7) +#define RP2_TXRX_CTL_TX_TRIG_s 16 +#define RP2_TXRX_CTL_TX_TRIG_m (0x3 << RP2_TXRX_CTL_RX_TRIG_s) +#define RP2_TXRX_CTL_DSRFLOW_m BIT(18) +#define RP2_TXRX_CTL_TXIRQ_m BIT(19) +#define RP2_TXRX_CTL_CTSFLOW_m BIT(23) +#define RP2_TXRX_CTL_TX_EN_m BIT(24) +#define RP2_TXRX_CTL_RTS_m BIT(25) +#define RP2_TXRX_CTL_DTR_m BIT(26) +#define RP2_TXRX_CTL_LOOP_m BIT(27) +#define RP2_TXRX_CTL_BREAK_m BIT(28) +#define RP2_TXRX_CTL_CMSPAR_m BIT(29) +#define RP2_TXRX_CTL_nPARODD_m BIT(30) +#define RP2_TXRX_CTL_PARENB_m BIT(31) + +#define RP2_UART_CTL 0x018 +#define RP2_UART_CTL_MODE_s 0 +#define RP2_UART_CTL_MODE_m (0x7 << RP2_UART_CTL_MODE_s) +#define RP2_UART_CTL_MODE_rs232 (0x1 << RP2_UART_CTL_MODE_s) +#define RP2_UART_CTL_FLUSH_RX_m BIT(3) +#define RP2_UART_CTL_FLUSH_TX_m BIT(4) +#define RP2_UART_CTL_RESET_CH_m BIT(5) +#define RP2_UART_CTL_XMIT_EN_m BIT(6) +#define RP2_UART_CTL_DATABITS_s 8 +#define RP2_UART_CTL_DATABITS_m (0x3 << RP2_UART_CTL_DATABITS_s) +#define RP2_UART_CTL_DATABITS_8 (0x3 << RP2_UART_CTL_DATABITS_s) +#define RP2_UART_CTL_DATABITS_7 (0x2 << RP2_UART_CTL_DATABITS_s) +#define RP2_UART_CTL_DATABITS_6 (0x1 << RP2_UART_CTL_DATABITS_s) +#define RP2_UART_CTL_DATABITS_5 (0x0 << RP2_UART_CTL_DATABITS_s) +#define RP2_UART_CTL_STOPBITS_m BIT(10) + +#define RP2_BAUD 0x01c + +/* ucode registers */ +#define RP2_TX_SWFLOW 0x02 +#define RP2_TX_SWFLOW_ena 0x81 +#define RP2_TX_SWFLOW_dis 0x9d + +#define RP2_RX_SWFLOW 0x0c +#define RP2_RX_SWFLOW_ena 0x81 +#define RP2_RX_SWFLOW_dis 0x8d + +#define RP2_RX_FIFO 0x37 +#define RP2_RX_FIFO_ena 0x08 +#define RP2_RX_FIFO_dis 0x81 + +static struct uart_driver rp2_uart_driver = { + .owner = THIS_MODULE, + .driver_name = DRV_NAME, + .dev_name = "ttyRP", + .nr = CONFIG_SERIAL_RP2_NR_UARTS, +}; + +struct rp2_card; + +struct rp2_uart_port { + struct uart_port port; + int idx; + int ignore_rx; + struct rp2_card *card; + void __iomem *asic_base; + void __iomem *base; + void __iomem *ucode; +}; + +struct rp2_card { + struct pci_dev *pdev; + struct rp2_uart_port *ports; + int n_ports; + int initialized_ports; + int minor_start; + int smpte; + void __iomem *bar0; + void __iomem *bar1; + spinlock_t card_lock; + struct completion fw_loaded; +}; + +#define RP_ID(prod) PCI_VDEVICE(RP, (prod)) +#define RP_CAP(ports, smpte) (((ports) << 8) | ((smpte) << 0)) + +static inline void rp2_decode_cap(const struct pci_device_id *id, + int *ports, int *smpte) +{ + *ports = id->driver_data >> 8; + *smpte = id->driver_data & 0xff; +} + +static DEFINE_SPINLOCK(rp2_minor_lock); +static int rp2_minor_next; + +static int rp2_alloc_ports(int n_ports) +{ + int ret = -ENOSPC; + + spin_lock(&rp2_minor_lock); + if (rp2_minor_next + n_ports <= CONFIG_SERIAL_RP2_NR_UARTS) { + /* sorry, no support for hot unplugging individual cards */ + ret = rp2_minor_next; + rp2_minor_next += n_ports; + } + spin_unlock(&rp2_minor_lock); + + return ret; +} + +static inline struct rp2_uart_port *port_to_up(struct uart_port *port) +{ + return container_of(port, struct rp2_uart_port, port); +} + +static void rp2_rmw(struct rp2_uart_port *up, int reg, + u32 clr_bits, u32 set_bits) +{ + u32 tmp = readl(up->base + reg); + tmp &= ~clr_bits; + tmp |= set_bits; + writel(tmp, up->base + reg); +} + +static void rp2_rmw_clr(struct rp2_uart_port *up, int reg, u32 val) +{ + rp2_rmw(up, reg, val, 0); +} + +static void rp2_rmw_set(struct rp2_uart_port *up, int reg, u32 val) +{ + rp2_rmw(up, reg, 0, val); +} + +static void rp2_mask_ch_irq(struct rp2_uart_port *up, int ch_num, + int is_enabled) +{ + unsigned long flags, irq_mask; + + spin_lock_irqsave(&up->card->card_lock, flags); + + irq_mask = readl(up->asic_base + RP2_CH_IRQ_MASK); + if (is_enabled) + irq_mask &= ~BIT(ch_num); + else + irq_mask |= BIT(ch_num); + writel(irq_mask, up->asic_base + RP2_CH_IRQ_MASK); + + spin_unlock_irqrestore(&up->card->card_lock, flags); +} + +static unsigned int rp2_uart_tx_empty(struct uart_port *port) +{ + struct rp2_uart_port *up = port_to_up(port); + unsigned long tx_fifo_bytes, flags; + + /* + * This should probably check the transmitter, not the FIFO. + * But the TXEMPTY bit doesn't seem to work unless the TX IRQ is + * enabled. + */ + spin_lock_irqsave(&up->port.lock, flags); + tx_fifo_bytes = readw(up->base + RP2_TX_FIFO_COUNT); + spin_unlock_irqrestore(&up->port.lock, flags); + + return tx_fifo_bytes ? 0 : TIOCSER_TEMT; +} + +static unsigned int rp2_uart_get_mctrl(struct uart_port *port) +{ + struct rp2_uart_port *up = port_to_up(port); + u32 status; + + status = readl(up->base + RP2_CHAN_STAT); + return ((status & RP2_CHAN_STAT_DCD_m) ? TIOCM_CAR : 0) | + ((status & RP2_CHAN_STAT_DSR_m) ? TIOCM_DSR : 0) | + ((status & RP2_CHAN_STAT_CTS_m) ? TIOCM_CTS : 0) | + ((status & RP2_CHAN_STAT_RI_m) ? TIOCM_RI : 0); +} + +static void rp2_uart_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + rp2_rmw(port_to_up(port), RP2_TXRX_CTL, + RP2_TXRX_CTL_DTR_m | RP2_TXRX_CTL_RTS_m | RP2_TXRX_CTL_LOOP_m, + ((mctrl & TIOCM_DTR) ? RP2_TXRX_CTL_DTR_m : 0) | + ((mctrl & TIOCM_RTS) ? RP2_TXRX_CTL_RTS_m : 0) | + ((mctrl & TIOCM_LOOP) ? RP2_TXRX_CTL_LOOP_m : 0)); +} + +static void rp2_uart_start_tx(struct uart_port *port) +{ + rp2_rmw_set(port_to_up(port), RP2_TXRX_CTL, RP2_TXRX_CTL_TXIRQ_m); +} + +static void rp2_uart_stop_tx(struct uart_port *port) +{ + rp2_rmw_clr(port_to_up(port), RP2_TXRX_CTL, RP2_TXRX_CTL_TXIRQ_m); +} + +static void rp2_uart_stop_rx(struct uart_port *port) +{ + rp2_rmw_clr(port_to_up(port), RP2_TXRX_CTL, RP2_TXRX_CTL_RXIRQ_m); +} + +static void rp2_uart_break_ctl(struct uart_port *port, int break_state) +{ + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + rp2_rmw(port_to_up(port), RP2_TXRX_CTL, RP2_TXRX_CTL_BREAK_m, + break_state ? RP2_TXRX_CTL_BREAK_m : 0); + spin_unlock_irqrestore(&port->lock, flags); +} + +static void rp2_uart_enable_ms(struct uart_port *port) +{ + rp2_rmw_set(port_to_up(port), RP2_TXRX_CTL, RP2_TXRX_CTL_MSRIRQ_m); +} + +static void __rp2_uart_set_termios(struct rp2_uart_port *up, + unsigned long cfl, + unsigned long ifl, + unsigned int baud_div) +{ + /* baud rate divisor (calculated elsewhere). 0 = divide-by-1 */ + writew(baud_div - 1, up->base + RP2_BAUD); + + /* data bits and stop bits */ + rp2_rmw(up, RP2_UART_CTL, + RP2_UART_CTL_STOPBITS_m | RP2_UART_CTL_DATABITS_m, + ((cfl & CSTOPB) ? RP2_UART_CTL_STOPBITS_m : 0) | + (((cfl & CSIZE) == CS8) ? RP2_UART_CTL_DATABITS_8 : 0) | + (((cfl & CSIZE) == CS7) ? RP2_UART_CTL_DATABITS_7 : 0) | + (((cfl & CSIZE) == CS6) ? RP2_UART_CTL_DATABITS_6 : 0) | + (((cfl & CSIZE) == CS5) ? RP2_UART_CTL_DATABITS_5 : 0)); + + /* parity and hardware flow control */ + rp2_rmw(up, RP2_TXRX_CTL, + RP2_TXRX_CTL_PARENB_m | RP2_TXRX_CTL_nPARODD_m | + RP2_TXRX_CTL_CMSPAR_m | RP2_TXRX_CTL_DTRFLOW_m | + RP2_TXRX_CTL_DSRFLOW_m | RP2_TXRX_CTL_RTSFLOW_m | + RP2_TXRX_CTL_CTSFLOW_m, + ((cfl & PARENB) ? RP2_TXRX_CTL_PARENB_m : 0) | + ((cfl & PARODD) ? 0 : RP2_TXRX_CTL_nPARODD_m) | + ((cfl & CMSPAR) ? RP2_TXRX_CTL_CMSPAR_m : 0) | + ((cfl & CRTSCTS) ? (RP2_TXRX_CTL_RTSFLOW_m | + RP2_TXRX_CTL_CTSFLOW_m) : 0)); + + /* XON/XOFF software flow control */ + writeb((ifl & IXON) ? RP2_TX_SWFLOW_ena : RP2_TX_SWFLOW_dis, + up->ucode + RP2_TX_SWFLOW); + writeb((ifl & IXOFF) ? RP2_RX_SWFLOW_ena : RP2_RX_SWFLOW_dis, + up->ucode + RP2_RX_SWFLOW); +} + +static void rp2_uart_set_termios(struct uart_port *port, + struct ktermios *new, + struct ktermios *old) +{ + struct rp2_uart_port *up = port_to_up(port); + unsigned long flags; + unsigned int baud, baud_div; + + baud = uart_get_baud_rate(port, new, old, 0, port->uartclk / 16); + baud_div = uart_get_divisor(port, baud); + + if (tty_termios_baud_rate(new)) + tty_termios_encode_baud_rate(new, baud, baud); + + spin_lock_irqsave(&port->lock, flags); + + /* ignore all characters if CREAD is not set */ + port->ignore_status_mask = (new->c_cflag & CREAD) ? 0 : RP2_DUMMY_READ; + + __rp2_uart_set_termios(up, new->c_cflag, new->c_iflag, baud_div); + uart_update_timeout(port, new->c_cflag, baud); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static void rp2_rx_chars(struct rp2_uart_port *up) +{ + u16 bytes = readw(up->base + RP2_RX_FIFO_COUNT); + struct tty_port *port = &up->port.state->port; + + for (; bytes != 0; bytes--) { + u32 byte = readw(up->base + RP2_DATA_BYTE) | RP2_DUMMY_READ; + char ch = byte & 0xff; + + if (likely(!(byte & RP2_DATA_BYTE_EXCEPTION_MASK))) { + if (!uart_handle_sysrq_char(&up->port, ch)) + uart_insert_char(&up->port, byte, 0, ch, + TTY_NORMAL); + } else { + char flag = TTY_NORMAL; + + if (byte & RP2_DATA_BYTE_BREAK_m) + flag = TTY_BREAK; + else if (byte & RP2_DATA_BYTE_ERR_FRAMING_m) + flag = TTY_FRAME; + else if (byte & RP2_DATA_BYTE_ERR_PARITY_m) + flag = TTY_PARITY; + uart_insert_char(&up->port, byte, + RP2_DATA_BYTE_ERR_OVERRUN_m, ch, flag); + } + up->port.icount.rx++; + } + + tty_flip_buffer_push(port); +} + +static void rp2_tx_chars(struct rp2_uart_port *up) +{ + u16 max_tx = FIFO_SIZE - readw(up->base + RP2_TX_FIFO_COUNT); + struct circ_buf *xmit = &up->port.state->xmit; + + if (uart_tx_stopped(&up->port)) { + rp2_uart_stop_tx(&up->port); + return; + } + + for (; max_tx != 0; max_tx--) { + if (up->port.x_char) { + writeb(up->port.x_char, up->base + RP2_DATA_BYTE); + up->port.x_char = 0; + up->port.icount.tx++; + continue; + } + if (uart_circ_empty(xmit)) { + rp2_uart_stop_tx(&up->port); + break; + } + writeb(xmit->buf[xmit->tail], up->base + RP2_DATA_BYTE); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + up->port.icount.tx++; + } + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(&up->port); +} + +static void rp2_ch_interrupt(struct rp2_uart_port *up) +{ + u32 status; + + spin_lock(&up->port.lock); + + /* + * The IRQ status bits are clear-on-write. Other status bits in + * this register aren't, so it's harmless to write to them. + */ + status = readl(up->base + RP2_CHAN_STAT); + writel(status, up->base + RP2_CHAN_STAT); + + if (status & RP2_CHAN_STAT_RXDATA_m) + rp2_rx_chars(up); + if (status & RP2_CHAN_STAT_TXEMPTY_m) + rp2_tx_chars(up); + if (status & RP2_CHAN_STAT_MS_CHANGED_MASK) + wake_up_interruptible(&up->port.state->port.delta_msr_wait); + + spin_unlock(&up->port.lock); +} + +static int rp2_asic_interrupt(struct rp2_card *card, unsigned int asic_id) +{ + void __iomem *base = card->bar1 + RP2_ASIC_OFFSET(asic_id); + int ch, handled = 0; + unsigned long status = readl(base + RP2_CH_IRQ_STAT) & + ~readl(base + RP2_CH_IRQ_MASK); + + for_each_set_bit(ch, &status, PORTS_PER_ASIC) { + rp2_ch_interrupt(&card->ports[ch]); + handled++; + } + return handled; +} + +static irqreturn_t rp2_uart_interrupt(int irq, void *dev_id) +{ + struct rp2_card *card = dev_id; + int handled; + + handled = rp2_asic_interrupt(card, 0); + if (card->n_ports >= PORTS_PER_ASIC) + handled += rp2_asic_interrupt(card, 1); + + return handled ? IRQ_HANDLED : IRQ_NONE; +} + +static inline void rp2_flush_fifos(struct rp2_uart_port *up) +{ + rp2_rmw_set(up, RP2_UART_CTL, + RP2_UART_CTL_FLUSH_RX_m | RP2_UART_CTL_FLUSH_TX_m); + readl(up->base + RP2_UART_CTL); + udelay(10); + rp2_rmw_clr(up, RP2_UART_CTL, + RP2_UART_CTL_FLUSH_RX_m | RP2_UART_CTL_FLUSH_TX_m); +} + +static int rp2_uart_startup(struct uart_port *port) +{ + struct rp2_uart_port *up = port_to_up(port); + + rp2_flush_fifos(up); + rp2_rmw(up, RP2_TXRX_CTL, RP2_TXRX_CTL_MSRIRQ_m, RP2_TXRX_CTL_RXIRQ_m); + rp2_rmw(up, RP2_TXRX_CTL, RP2_TXRX_CTL_RX_TRIG_m, + RP2_TXRX_CTL_RX_TRIG_1); + rp2_rmw(up, RP2_CHAN_STAT, 0, 0); + rp2_mask_ch_irq(up, up->idx, 1); + + return 0; +} + +static void rp2_uart_shutdown(struct uart_port *port) +{ + struct rp2_uart_port *up = port_to_up(port); + unsigned long flags; + + rp2_uart_break_ctl(port, 0); + + spin_lock_irqsave(&port->lock, flags); + rp2_mask_ch_irq(up, up->idx, 0); + rp2_rmw(up, RP2_CHAN_STAT, 0, 0); + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *rp2_uart_type(struct uart_port *port) +{ + return (port->type == PORT_RP2) ? "RocketPort 2 UART" : NULL; +} + +static void rp2_uart_release_port(struct uart_port *port) +{ + /* Nothing to release ... */ +} + +static int rp2_uart_request_port(struct uart_port *port) +{ + /* UARTs always present */ + return 0; +} + +static void rp2_uart_config_port(struct uart_port *port, int flags) +{ + if (flags & UART_CONFIG_TYPE) + port->type = PORT_RP2; +} + +static int rp2_uart_verify_port(struct uart_port *port, + struct serial_struct *ser) +{ + if (ser->type != PORT_UNKNOWN && ser->type != PORT_RP2) + return -EINVAL; + + return 0; +} + +static const struct uart_ops rp2_uart_ops = { + .tx_empty = rp2_uart_tx_empty, + .set_mctrl = rp2_uart_set_mctrl, + .get_mctrl = rp2_uart_get_mctrl, + .stop_tx = rp2_uart_stop_tx, + .start_tx = rp2_uart_start_tx, + .stop_rx = rp2_uart_stop_rx, + .enable_ms = rp2_uart_enable_ms, + .break_ctl = rp2_uart_break_ctl, + .startup = rp2_uart_startup, + .shutdown = rp2_uart_shutdown, + .set_termios = rp2_uart_set_termios, + .type = rp2_uart_type, + .release_port = rp2_uart_release_port, + .request_port = rp2_uart_request_port, + .config_port = rp2_uart_config_port, + .verify_port = rp2_uart_verify_port, +}; + +static void rp2_reset_asic(struct rp2_card *card, unsigned int asic_id) +{ + void __iomem *base = card->bar1 + RP2_ASIC_OFFSET(asic_id); + u32 clk_cfg; + + writew(1, base + RP2_GLOBAL_CMD); + readw(base + RP2_GLOBAL_CMD); + msleep(100); + writel(0, base + RP2_CLK_PRESCALER); + + /* TDM clock configuration */ + clk_cfg = readw(base + RP2_ASIC_CFG); + clk_cfg = (clk_cfg & ~BIT(8)) | BIT(9); + writew(clk_cfg, base + RP2_ASIC_CFG); + + /* IRQ routing */ + writel(ALL_PORTS_MASK, base + RP2_CH_IRQ_MASK); + writel(RP2_ASIC_IRQ_EN_m, base + RP2_ASIC_IRQ); +} + +static void rp2_init_card(struct rp2_card *card) +{ + writel(4, card->bar0 + RP2_FPGA_CTL0); + writel(0, card->bar0 + RP2_FPGA_CTL1); + + rp2_reset_asic(card, 0); + if (card->n_ports >= PORTS_PER_ASIC) + rp2_reset_asic(card, 1); + + writel(RP2_IRQ_MASK_EN_m, card->bar0 + RP2_IRQ_MASK); +} + +static void rp2_init_port(struct rp2_uart_port *up, const struct firmware *fw) +{ + int i; + + writel(RP2_UART_CTL_RESET_CH_m, up->base + RP2_UART_CTL); + readl(up->base + RP2_UART_CTL); + udelay(1); + + writel(0, up->base + RP2_TXRX_CTL); + writel(0, up->base + RP2_UART_CTL); + readl(up->base + RP2_UART_CTL); + udelay(1); + + rp2_flush_fifos(up); + + for (i = 0; i < min_t(int, fw->size, RP2_UCODE_BYTES); i++) + writeb(fw->data[i], up->ucode + i); + + __rp2_uart_set_termios(up, CS8 | CREAD | CLOCAL, 0, DEFAULT_BAUD_DIV); + rp2_uart_set_mctrl(&up->port, 0); + + writeb(RP2_RX_FIFO_ena, up->ucode + RP2_RX_FIFO); + rp2_rmw(up, RP2_UART_CTL, RP2_UART_CTL_MODE_m, + RP2_UART_CTL_XMIT_EN_m | RP2_UART_CTL_MODE_rs232); + rp2_rmw_set(up, RP2_TXRX_CTL, + RP2_TXRX_CTL_TX_EN_m | RP2_TXRX_CTL_RX_EN_m); +} + +static void rp2_remove_ports(struct rp2_card *card) +{ + int i; + + for (i = 0; i < card->initialized_ports; i++) + uart_remove_one_port(&rp2_uart_driver, &card->ports[i].port); + card->initialized_ports = 0; +} + +static void rp2_fw_cb(const struct firmware *fw, void *context) +{ + struct rp2_card *card = context; + resource_size_t phys_base; + int i, rc = -ENOENT; + + if (!fw) { + dev_err(&card->pdev->dev, "cannot find '%s' firmware image\n", + RP2_FW_NAME); + goto no_fw; + } + + phys_base = pci_resource_start(card->pdev, 1); + + for (i = 0; i < card->n_ports; i++) { + struct rp2_uart_port *rp = &card->ports[i]; + struct uart_port *p; + int j = (unsigned)i % PORTS_PER_ASIC; + + rp->asic_base = card->bar1; + rp->base = card->bar1 + RP2_PORT_BASE + j*RP2_PORT_SPACING; + rp->ucode = card->bar1 + RP2_UCODE_BASE + j*RP2_UCODE_SPACING; + rp->card = card; + rp->idx = j; + + p = &rp->port; + p->line = card->minor_start + i; + p->dev = &card->pdev->dev; + p->type = PORT_RP2; + p->iotype = UPIO_MEM32; + p->uartclk = UART_CLOCK; + p->regshift = 2; + p->fifosize = FIFO_SIZE; + p->ops = &rp2_uart_ops; + p->irq = card->pdev->irq; + p->membase = rp->base; + p->mapbase = phys_base + RP2_PORT_BASE + j*RP2_PORT_SPACING; + + if (i >= PORTS_PER_ASIC) { + rp->asic_base += RP2_ASIC_SPACING; + rp->base += RP2_ASIC_SPACING; + rp->ucode += RP2_ASIC_SPACING; + p->mapbase += RP2_ASIC_SPACING; + } + + rp2_init_port(rp, fw); + rc = uart_add_one_port(&rp2_uart_driver, p); + if (rc) { + dev_err(&card->pdev->dev, + "error registering port %d: %d\n", i, rc); + rp2_remove_ports(card); + break; + } + card->initialized_ports++; + } + + release_firmware(fw); +no_fw: + /* + * rp2_fw_cb() is called from a workqueue long after rp2_probe() + * has already returned success. So if something failed here, + * we'll just leave the now-dormant device in place until somebody + * unbinds it. + */ + if (rc) + dev_warn(&card->pdev->dev, "driver initialization failed\n"); + + complete(&card->fw_loaded); +} + +static int rp2_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct rp2_card *card; + struct rp2_uart_port *ports; + void __iomem * const *bars; + int rc; + + card = devm_kzalloc(&pdev->dev, sizeof(*card), GFP_KERNEL); + if (!card) + return -ENOMEM; + pci_set_drvdata(pdev, card); + spin_lock_init(&card->card_lock); + init_completion(&card->fw_loaded); + + rc = pcim_enable_device(pdev); + if (rc) + return rc; + + rc = pcim_iomap_regions_request_all(pdev, 0x03, DRV_NAME); + if (rc) + return rc; + + bars = pcim_iomap_table(pdev); + card->bar0 = bars[0]; + card->bar1 = bars[1]; + card->pdev = pdev; + + rp2_decode_cap(id, &card->n_ports, &card->smpte); + dev_info(&pdev->dev, "found new card with %d ports\n", card->n_ports); + + card->minor_start = rp2_alloc_ports(card->n_ports); + if (card->minor_start < 0) { + dev_err(&pdev->dev, + "too many ports (try increasing CONFIG_SERIAL_RP2_NR_UARTS)\n"); + return -EINVAL; + } + + rp2_init_card(card); + + ports = devm_kzalloc(&pdev->dev, sizeof(*ports) * card->n_ports, + GFP_KERNEL); + if (!ports) + return -ENOMEM; + card->ports = ports; + + rc = devm_request_irq(&pdev->dev, pdev->irq, rp2_uart_interrupt, + IRQF_SHARED, DRV_NAME, card); + if (rc) + return rc; + + /* + * Only catastrophic errors (e.g. ENOMEM) are reported here. + * If the FW image is missing, we'll find out in rp2_fw_cb() + * and print an error message. + */ + rc = request_firmware_nowait(THIS_MODULE, 1, RP2_FW_NAME, &pdev->dev, + GFP_KERNEL, card, rp2_fw_cb); + if (rc) + return rc; + dev_dbg(&pdev->dev, "waiting for firmware blob...\n"); + + return 0; +} + +static void rp2_remove(struct pci_dev *pdev) +{ + struct rp2_card *card = pci_get_drvdata(pdev); + + wait_for_completion(&card->fw_loaded); + rp2_remove_ports(card); +} + +static DEFINE_PCI_DEVICE_TABLE(rp2_pci_tbl) = { + + /* RocketPort INFINITY cards */ + + { RP_ID(0x0040), RP_CAP(8, 0) }, /* INF Octa, RJ45, selectable */ + { RP_ID(0x0041), RP_CAP(32, 0) }, /* INF 32, ext interface */ + { RP_ID(0x0042), RP_CAP(8, 0) }, /* INF Octa, ext interface */ + { RP_ID(0x0043), RP_CAP(16, 0) }, /* INF 16, ext interface */ + { RP_ID(0x0044), RP_CAP(4, 0) }, /* INF Quad, DB, selectable */ + { RP_ID(0x0045), RP_CAP(8, 0) }, /* INF Octa, DB, selectable */ + { RP_ID(0x0046), RP_CAP(4, 0) }, /* INF Quad, ext interface */ + { RP_ID(0x0047), RP_CAP(4, 0) }, /* INF Quad, RJ45 */ + { RP_ID(0x004a), RP_CAP(4, 0) }, /* INF Plus, Quad */ + { RP_ID(0x004b), RP_CAP(8, 0) }, /* INF Plus, Octa */ + { RP_ID(0x004c), RP_CAP(8, 0) }, /* INF III, Octa */ + { RP_ID(0x004d), RP_CAP(4, 0) }, /* INF III, Quad */ + { RP_ID(0x004e), RP_CAP(2, 0) }, /* INF Plus, 2, RS232 */ + { RP_ID(0x004f), RP_CAP(2, 1) }, /* INF Plus, 2, SMPTE */ + { RP_ID(0x0050), RP_CAP(4, 0) }, /* INF Plus, Quad, RJ45 */ + { RP_ID(0x0051), RP_CAP(8, 0) }, /* INF Plus, Octa, RJ45 */ + { RP_ID(0x0052), RP_CAP(8, 1) }, /* INF Octa, SMPTE */ + + /* RocketPort EXPRESS cards */ + + { RP_ID(0x0060), RP_CAP(8, 0) }, /* EXP Octa, RJ45, selectable */ + { RP_ID(0x0061), RP_CAP(32, 0) }, /* EXP 32, ext interface */ + { RP_ID(0x0062), RP_CAP(8, 0) }, /* EXP Octa, ext interface */ + { RP_ID(0x0063), RP_CAP(16, 0) }, /* EXP 16, ext interface */ + { RP_ID(0x0064), RP_CAP(4, 0) }, /* EXP Quad, DB, selectable */ + { RP_ID(0x0065), RP_CAP(8, 0) }, /* EXP Octa, DB, selectable */ + { RP_ID(0x0066), RP_CAP(4, 0) }, /* EXP Quad, ext interface */ + { RP_ID(0x0067), RP_CAP(4, 0) }, /* EXP Quad, RJ45 */ + { RP_ID(0x0068), RP_CAP(8, 0) }, /* EXP Octa, RJ11 */ + { RP_ID(0x0072), RP_CAP(8, 1) }, /* EXP Octa, SMPTE */ + { } +}; +MODULE_DEVICE_TABLE(pci, rp2_pci_tbl); + +static struct pci_driver rp2_pci_driver = { + .name = DRV_NAME, + .id_table = rp2_pci_tbl, + .probe = rp2_probe, + .remove = rp2_remove, +}; + +static int __init rp2_uart_init(void) +{ + int rc; + + rc = uart_register_driver(&rp2_uart_driver); + if (rc) + return rc; + + rc = pci_register_driver(&rp2_pci_driver); + if (rc) { + uart_unregister_driver(&rp2_uart_driver); + return rc; + } + + return 0; +} + +static void __exit rp2_uart_exit(void) +{ + pci_unregister_driver(&rp2_pci_driver); + uart_unregister_driver(&rp2_uart_driver); +} + +module_init(rp2_uart_init); +module_exit(rp2_uart_exit); + +MODULE_DESCRIPTION("Comtrol RocketPort EXPRESS/INFINITY driver"); +MODULE_AUTHOR("Kevin Cernekee "); +MODULE_LICENSE("GPL v2"); +MODULE_FIRMWARE(RP2_FW_NAME); diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 78f99d97475b..9dd47a569726 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -219,4 +219,7 @@ /* ARC (Synopsys) on-chip UART */ #define PORT_ARC 101 +/* Rocketport EXPRESS/INFINITY */ +#define PORT_RP2 102 + #endif /* _UAPILINUX_SERIAL_CORE_H */ -- cgit v1.2.3 From e6f30c731718db45cec380964dfee210307cfc4a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 18 Jan 2013 07:17:30 +0000 Subject: netfilter: x_tables: add xt_bpf match Support arbitrary linux socket filter (BPF) programs as x_tables match rules. This allows for very expressive filters, and on platforms with BPF JIT appears competitive with traditional hardcoded iptables rules using the u32 match. The size of the filter has been artificially limited to 64 instructions maximum to avoid bloating the size of each rule using this new match. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_bpf.h | 17 ++++++++ net/netfilter/Kconfig | 9 +++++ net/netfilter/Makefile | 1 + net/netfilter/xt_bpf.c | 73 +++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+) create mode 100644 include/uapi/linux/netfilter/xt_bpf.h create mode 100644 net/netfilter/xt_bpf.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h new file mode 100644 index 000000000000..5dda450eb55b --- /dev/null +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -0,0 +1,17 @@ +#ifndef _XT_BPF_H +#define _XT_BPF_H + +#include +#include + +#define XT_BPF_MAX_NUM_INSTR 64 + +struct xt_bpf_info { + __u16 bpf_program_num_elem; + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; + + /* only used in the kernel */ + struct sk_filter *filter __attribute__((aligned(8))); +}; + +#endif /*_XT_BPF_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index bb48607d4ee4..eb2c8ebf6d99 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -811,6 +811,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_BPF + tristate '"bpf" match support' + depends on NETFILTER_ADVANCED + help + BPF matching applies a linux socket filter to each packet and + accepts those for which the filter returns non-zero. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index b3bbda60945e..a1abf87d43bf 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -99,6 +99,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o +obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c new file mode 100644 index 000000000000..12d4da8e6c77 --- /dev/null +++ b/net/netfilter/xt_bpf.c @@ -0,0 +1,73 @@ +/* Xtables module to match packets using a BPF filter. + * Copyright 2013 Google Inc. + * Written by Willem de Bruijn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Willem de Bruijn "); +MODULE_DESCRIPTION("Xtables: BPF filter match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_bpf"); +MODULE_ALIAS("ip6t_bpf"); + +static int bpf_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_bpf_info *info = par->matchinfo; + struct sock_fprog program; + + program.len = info->bpf_program_num_elem; + program.filter = (struct sock_filter __user *) info->bpf_program; + if (sk_unattached_filter_create(&info->filter, &program)) { + pr_info("bpf: check failed: parse error\n"); + return -EINVAL; + } + + return 0; +} + +static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + + return SK_RUN_FILTER(info->filter, skb); +} + +static void bpf_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + sk_unattached_filter_destroy(info->filter); +} + +static struct xt_match bpf_mt_reg __read_mostly = { + .name = "bpf", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = bpf_mt_check, + .match = bpf_mt, + .destroy = bpf_mt_destroy, + .matchsize = sizeof(struct xt_bpf_info), + .me = THIS_MODULE, +}; + +static int __init bpf_mt_init(void) +{ + return xt_register_match(&bpf_mt_reg); +} + +static void __exit bpf_mt_exit(void) +{ + xt_unregister_match(&bpf_mt_reg); +} + +module_init(bpf_mt_init); +module_exit(bpf_mt_exit); -- cgit v1.2.3 From e7db3cbcd6508235d63ba4a31bbd1ce4fdece6e1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 21 Jan 2013 12:30:59 +0100 Subject: netfilter: add missing xt_bpf.h header in installation Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 08f555fef13f..8b4bd36a7a84 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -35,6 +35,7 @@ header-y += xt_TCPOPTSTRIP.h header-y += xt_TEE.h header-y += xt_TPROXY.h header-y += xt_addrtype.h +header-y += xt_bpf.h header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h -- cgit v1.2.3 From 8a454ab95e5ccbffd04363e9c028f60739bc3fa4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 21 Jan 2013 13:02:19 +0100 Subject: netfilter: add missing xt_connlabel.h header in installation In (c539f01 netfilter: add connlabel conntrack extension), it was missing the change to the Kbuild file to install the header in the system. Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 8b4bd36a7a84..41115776d76f 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -39,6 +39,7 @@ header-y += xt_bpf.h header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h +header-y += xt_connlabel.h header-y += xt_connlimit.h header-y += xt_connmark.h header-y += xt_conntrack.h -- cgit v1.2.3 From bbb923a4c2d17ebd5ec34755fe19a33914cbd86f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 21 Jan 2013 06:00:25 +0000 Subject: mcast: define and use MRT[6]_MAX in ip[6]_mroute_opt() This will ease further addition of new MRT[6]_* values and avoid to update in6.h each time. Note that we reduce the maximum value from 210 to 209, but 210 does not match any known value in ip[6]_mroute_setsockopt(). Signed-off-by: Nicolas Dichtel Acked-by: David L Stevens Signed-off-by: David S. Miller --- include/linux/mroute.h | 2 +- include/linux/mroute6.h | 2 +- include/uapi/linux/in6.h | 15 ++++----------- include/uapi/linux/mroute.h | 1 + include/uapi/linux/mroute6.h | 1 + 5 files changed, 8 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index ea00d9162ee5..79aaa9fc1a15 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -9,7 +9,7 @@ #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) { - return (opt >= MRT_BASE) && (opt <= MRT_BASE + 10); + return (opt >= MRT_BASE) && (opt <= MRT_MAX); } #else static inline int ip_mroute_opt(int opt) diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index a223561ba12e..66982e764051 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -10,7 +10,7 @@ #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) { - return (opt >= MRT6_BASE) && (opt <= MRT6_BASE + 10); + return (opt >= MRT6_BASE) && (opt <= MRT6_MAX); } #else static inline int ip6_mroute_opt(int opt) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 5673b97dcf54..53b1d56a6e7f 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -259,17 +259,10 @@ struct in6_flowlabel_req { /* * Multicast Routing: - * see include/linux/mroute6.h. + * see include/uapi/linux/mroute6.h. * - * MRT6_INIT 200 - * MRT6_DONE 201 - * MRT6_ADD_MIF 202 - * MRT6_DEL_MIF 203 - * MRT6_ADD_MFC 204 - * MRT6_DEL_MFC 205 - * MRT6_VERSION 206 - * MRT6_ASSERT 207 - * MRT6_PIM 208 - * (reserved) 209 + * MRT6_BASE 200 + * ... + * MRT6_MAX */ #endif /* _UAPI_LINUX_IN6_H */ diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 16929993acc4..1c11004af5db 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -26,6 +26,7 @@ #define MRT_ASSERT (MRT_BASE+7) /* Activate PIM assert mode */ #define MRT_PIM (MRT_BASE+8) /* enable PIM code */ #define MRT_TABLE (MRT_BASE+9) /* Specify mroute table ID */ +#define MRT_MAX (MRT_BASE+9) #define SIOCGETVIFCNT SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index 3e89b5e7f9e3..c206ae3a2327 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -26,6 +26,7 @@ #define MRT6_ASSERT (MRT6_BASE+7) /* Activate PIM assert mode */ #define MRT6_PIM (MRT6_BASE+8) /* enable PIM code */ #define MRT6_TABLE (MRT6_BASE+9) /* Specify mroute table ID */ +#define MRT6_MAX (MRT6_BASE+9) #define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) -- cgit v1.2.3 From 660b26dc1a8aeb33c2a2246ebf1b3684449a74b7 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 21 Jan 2013 06:00:26 +0000 Subject: mcast: add multicast proxy support (IPv4 and IPv6) This patch add the support of proxy multicast, ie being able to build a static multicast tree. It adds the support of (*,*) and (*,G) entries. The user should define an (*,*) entry which is not used for real forwarding. This entry defines the upstream in iif and contains all interfaces from the static tree in its oifs. It will be used to forward packet upstream when they come from an interface belonging to the static tree. Hence, the user should define (*,G) entries to build its static tree. Note that upstream interface must be part of oifs: packets are sent to all oifs interfaces except the input interface. This ensures to always join the whole static tree, even if the packet is not coming from the upstream interface. Signed-off-by: Nicolas Dichtel Acked-by: David L Stevens Signed-off-by: David S. Miller --- include/uapi/linux/mroute.h | 4 +- include/uapi/linux/mroute6.h | 4 +- net/ipv4/ipmr.c | 119 +++++++++++++++++++++++++++++++++++----- net/ipv6/ip6mr.c | 126 ++++++++++++++++++++++++++++++++++++++----- 4 files changed, 225 insertions(+), 28 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 1c11004af5db..a382d2c04a42 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -26,7 +26,9 @@ #define MRT_ASSERT (MRT_BASE+7) /* Activate PIM assert mode */ #define MRT_PIM (MRT_BASE+8) /* enable PIM code */ #define MRT_TABLE (MRT_BASE+9) /* Specify mroute table ID */ -#define MRT_MAX (MRT_BASE+9) +#define MRT_ADD_MFC_PROXY (MRT_BASE+10) /* Add a (*,*|G) mfc entry */ +#define MRT_DEL_MFC_PROXY (MRT_BASE+11) /* Del a (*,*|G) mfc entry */ +#define MRT_MAX (MRT_BASE+11) #define SIOCGETVIFCNT SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index c206ae3a2327..ce91215cf7e6 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -26,7 +26,9 @@ #define MRT6_ASSERT (MRT6_BASE+7) /* Activate PIM assert mode */ #define MRT6_PIM (MRT6_BASE+8) /* enable PIM code */ #define MRT6_TABLE (MRT6_BASE+9) /* Specify mroute table ID */ -#define MRT6_MAX (MRT6_BASE+9) +#define MRT6_ADD_MFC_PROXY (MRT6_BASE+10) /* Add a (*,*|G) mfc entry */ +#define MRT6_DEL_MFC_PROXY (MRT6_BASE+11) /* Del a (*,*|G) mfc entry */ +#define MRT6_MAX (MRT6_BASE+11) #define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a9454cbd953c..4b5e22670d44 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -828,6 +828,49 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, return NULL; } +/* Look for a (*,*,oif) entry */ +static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, + int vifi) +{ + int line = MFC_HASH(INADDR_ANY, INADDR_ANY); + struct mfc_cache *c; + + list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) + if (c->mfc_origin == INADDR_ANY && + c->mfc_mcastgrp == INADDR_ANY && + c->mfc_un.res.ttls[vifi] < 255) + return c; + + return NULL; +} + +/* Look for a (*,G) entry */ +static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, + __be32 mcastgrp, int vifi) +{ + int line = MFC_HASH(mcastgrp, INADDR_ANY); + struct mfc_cache *c, *proxy; + + if (mcastgrp == INADDR_ANY) + goto skip; + + list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) + if (c->mfc_origin == INADDR_ANY && + c->mfc_mcastgrp == mcastgrp) { + if (c->mfc_un.res.ttls[vifi] < 255) + return c; + + /* It's ok if the vifi is part of the static tree */ + proxy = ipmr_cache_find_any_parent(mrt, + c->mfc_parent); + if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) + return c; + } + +skip: + return ipmr_cache_find_any_parent(mrt, vifi); +} + /* * Allocate a multicast cache entry */ @@ -1053,7 +1096,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) * MFC cache manipulation by user space mroute daemon */ -static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) +static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { int line; struct mfc_cache *c, *next; @@ -1062,7 +1105,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { + c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && + (parent == -1 || parent == c->mfc_parent)) { list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_cache_free(c); @@ -1073,7 +1117,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) } static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, - struct mfcctl *mfc, int mrtsock) + struct mfcctl *mfc, int mrtsock, int parent) { bool found = false; int line; @@ -1086,7 +1130,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { + c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && + (parent == -1 || parent == c->mfc_parent)) { found = true; break; } @@ -1103,7 +1148,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return 0; } - if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) + if (mfc->mfcc_mcastgrp.s_addr != INADDR_ANY && + !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; c = ipmr_cache_alloc(); @@ -1218,7 +1264,7 @@ static void mrtsock_destruct(struct sock *sk) int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { - int ret; + int ret, parent = 0; struct vifctl vif; struct mfcctl mfc; struct net *net = sock_net(sk); @@ -1287,16 +1333,22 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi */ case MRT_ADD_MFC: case MRT_DEL_MFC: + parent = -1; + case MRT_ADD_MFC_PROXY: + case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) return -EINVAL; if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; + if (parent == 0) + parent = mfc.mfcc_parent; rtnl_lock(); - if (optname == MRT_DEL_MFC) - ret = ipmr_mfc_delete(mrt, &mfc); + if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) + ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, - sk == rtnl_dereference(mrt->mroute_sk)); + sk == rtnl_dereference(mrt->mroute_sk), + parent); rtnl_unlock(); return ret; /* @@ -1749,17 +1801,28 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, { int psend = -1; int vif, ct; + int true_vifi = ipmr_find_vif(mrt, skb->dev); vif = cache->mfc_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + if (cache->mfc_origin == INADDR_ANY && true_vifi >= 0) { + struct mfc_cache *cache_proxy; + + /* For an (*,G) entry, we only check that the incomming + * interface is part of the static tree. + */ + cache_proxy = ipmr_cache_find_any_parent(mrt, vif); + if (cache_proxy && + cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + goto forward; + } + /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { - int true_vifi; - if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -1776,7 +1839,6 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, } cache->mfc_un.res.wrong_if++; - true_vifi = ipmr_find_vif(mrt, skb->dev); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -1794,15 +1856,33 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, goto dont_forward; } +forward: mrt->vif_table[vif].pkt_in++; mrt->vif_table[vif].bytes_in += skb->len; /* * Forward the frame */ + if (cache->mfc_origin == INADDR_ANY && + cache->mfc_mcastgrp == INADDR_ANY) { + if (true_vifi >= 0 && + true_vifi != cache->mfc_parent && + ip_hdr(skb)->ttl > + cache->mfc_un.res.ttls[cache->mfc_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = cache->mfc_parent; + goto last_forward; + } + goto dont_forward; + } for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { - if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { + /* For (*,G) entry, don't forward to the incoming interface */ + if ((cache->mfc_origin != INADDR_ANY || ct != true_vifi) && + ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -1813,6 +1893,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, psend = ct; } } +last_forward: if (psend != -1) { if (local) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -1902,6 +1983,13 @@ int ip_mr_input(struct sk_buff *skb) /* already under rcu_read_lock() */ cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); + if (cache == NULL) { + int vif = ipmr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, + vif); + } /* * No usable cache entry @@ -2107,7 +2195,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, rcu_read_lock(); cache = ipmr_cache_find(mrt, saddr, daddr); + if (cache == NULL && skb->dev) { + int vif = ipmr_find_vif(mrt, skb->dev); + if (vif >= 0) + cache = ipmr_cache_find_any(mrt, daddr, vif); + } if (cache == NULL) { struct sk_buff *skb2; struct iphdr *iph; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 26dcdec9e3a5..acc32494006a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1017,6 +1017,50 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, return NULL; } +/* Look for a (*,*,oif) entry */ +static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt, + mifi_t mifi) +{ + int line = MFC6_HASH(&in6addr_any, &in6addr_any); + struct mfc6_cache *c; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_any(&c->mf6c_mcastgrp) && + (c->mfc_un.res.ttls[mifi] < 255)) + return c; + + return NULL; +} + +/* Look for a (*,G) entry */ +static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt, + struct in6_addr *mcastgrp, + mifi_t mifi) +{ + int line = MFC6_HASH(mcastgrp, &in6addr_any); + struct mfc6_cache *c, *proxy; + + if (ipv6_addr_any(mcastgrp)) + goto skip; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) { + if (c->mfc_un.res.ttls[mifi] < 255) + return c; + + /* It's ok if the mifi is part of the static tree */ + proxy = ip6mr_cache_find_any_parent(mrt, + c->mf6c_parent); + if (proxy && proxy->mfc_un.res.ttls[mifi] < 255) + return c; + } + +skip: + return ip6mr_cache_find_any_parent(mrt, mifi); +} + /* * Allocate a multicast cache entry */ @@ -1247,7 +1291,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) * MFC6 cache manipulation by user space */ -static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) +static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, + int parent) { int line; struct mfc6_cache *c, *next; @@ -1256,7 +1301,9 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == c->mf6c_parent)) { write_lock_bh(&mrt_lock); list_del(&c->list); write_unlock_bh(&mrt_lock); @@ -1391,7 +1438,7 @@ void ip6_mr_cleanup(void) } static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, - struct mf6cctl *mfc, int mrtsock) + struct mf6cctl *mfc, int mrtsock, int parent) { bool found = false; int line; @@ -1413,7 +1460,9 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == mfc->mf6cc_parent)) { found = true; break; } @@ -1430,7 +1479,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, return 0; } - if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) + if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && + !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; c = ip6mr_cache_alloc(); @@ -1596,7 +1646,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { - int ret; + int ret, parent = 0; struct mif6ctl vif; struct mf6cctl mfc; mifi_t mifi; @@ -1653,15 +1703,21 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns */ case MRT6_ADD_MFC: case MRT6_DEL_MFC: + parent = -1; + case MRT6_ADD_MFC_PROXY: + case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) return -EINVAL; if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; + if (parent == 0) + parent = mfc.mf6cc_parent; rtnl_lock(); - if (optname == MRT6_DEL_MFC) - ret = ip6mr_mfc_delete(mrt, &mfc); + if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) + ret = ip6mr_mfc_delete(mrt, &mfc, parent); else - ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk); + ret = ip6mr_mfc_add(net, mrt, &mfc, + sk == mrt->mroute6_sk, parent); rtnl_unlock(); return ret; @@ -2015,19 +2071,29 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, { int psend = -1; int vif, ct; + int true_vifi = ip6mr_find_vif(mrt, skb->dev); vif = cache->mf6c_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { + struct mfc6_cache *cache_proxy; + + /* For an (*,G) entry, we only check that the incomming + * interface is part of the static tree. + */ + cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); + if (cache_proxy && + cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + goto forward; + } + /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif6_table[vif].dev != skb->dev) { - int true_vifi; - cache->mfc_un.res.wrong_if++; - true_vifi = ip6mr_find_vif(mrt, skb->dev); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -2045,14 +2111,32 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, goto dont_forward; } +forward: mrt->vif6_table[vif].pkt_in++; mrt->vif6_table[vif].bytes_in += skb->len; /* * Forward the frame */ + if (ipv6_addr_any(&cache->mf6c_origin) && + ipv6_addr_any(&cache->mf6c_mcastgrp)) { + if (true_vifi >= 0 && + true_vifi != cache->mf6c_parent && + ipv6_hdr(skb)->hop_limit > + cache->mfc_un.res.ttls[cache->mf6c_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = cache->mf6c_parent; + goto last_forward; + } + goto dont_forward; + } for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { - if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { + /* For (*,G) entry, don't forward to the incoming interface */ + if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) && + ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) @@ -2061,6 +2145,7 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, psend = ct; } } +last_forward: if (psend != -1) { ip6mr_forward2(net, mrt, skb, cache, psend); return 0; @@ -2096,6 +2181,14 @@ int ip6_mr_input(struct sk_buff *skb) read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + if (cache == NULL) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, + &ipv6_hdr(skb)->daddr, + vif); + } /* * No usable cache entry @@ -2183,6 +2276,13 @@ int ip6mr_get_route(struct net *net, read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); + if (!cache && skb->dev) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, + vif); + } if (!cache) { struct sk_buff *skb2; -- cgit v1.2.3 From 7e58d5aea8abb993983a3f3088fd4a3f06180a1c Mon Sep 17 00:00:00 2001 From: Amos Kong Date: Mon, 21 Jan 2013 01:17:23 +0000 Subject: virtio-net: introduce a new control to set macaddr Currently we write MAC address to pci config space byte by byte, this means that we have an intermediate step where mac is wrong. This patch introduced a new control command to set MAC address, it's atomic. VIRTIO_NET_F_CTRL_MAC_ADDR is a new feature bit for compatibility. Signed-off-by: Amos Kong Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 21 ++++++++++++++++++--- include/uapi/linux/virtio_net.h | 8 +++++++- 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 395ab4ff3e64..701408a1ded6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -802,14 +802,28 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) struct virtnet_info *vi = netdev_priv(dev); struct virtio_device *vdev = vi->vdev; int ret; + struct sockaddr *addr = p; + struct scatterlist sg; - ret = eth_mac_addr(dev, p); + ret = eth_prepare_mac_addr_change(dev, p); if (ret) return ret; - if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) + if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { + sg_init_one(&sg, addr->sa_data, dev->addr_len); + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, + VIRTIO_NET_CTRL_MAC_ADDR_SET, + &sg, 1, 0)) { + dev_warn(&vdev->dev, + "Failed to set mac address by vq command.\n"); + return -EINVAL; + } + } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { vdev->config->set(vdev, offsetof(struct virtio_net_config, mac), - dev->dev_addr, dev->addr_len); + addr->sa_data, dev->addr_len); + } + + eth_commit_mac_addr_change(dev, p); return 0; } @@ -1627,6 +1641,7 @@ static unsigned int features[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, + VIRTIO_NET_F_CTRL_MAC_ADDR, }; static struct virtio_driver virtio_net_driver = { diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 848e3584d7c8..a5a8c88753b9 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -53,6 +53,7 @@ * network */ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ +#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ #define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ @@ -127,7 +128,7 @@ typedef __u8 virtio_net_ctrl_ack; #define VIRTIO_NET_CTRL_RX_NOBCAST 5 /* - * Control the MAC filter table. + * Control the MAC * * The MAC filter table is managed by the hypervisor, the guest should * assume the size is infinite. Filtering should be considered @@ -140,6 +141,10 @@ typedef __u8 virtio_net_ctrl_ack; * first sg list contains unicast addresses, the second is for multicast. * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature * is available. + * + * The ADDR_SET command requests one out scatterlist, it contains a + * 6 bytes MAC address. This functionality is present if the + * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. */ struct virtio_net_ctrl_mac { __u32 entries; @@ -148,6 +153,7 @@ struct virtio_net_ctrl_mac { #define VIRTIO_NET_CTRL_MAC 1 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 /* * Control VLAN filtering -- cgit v1.2.3 From afe759511808cd5bb508b598007cf0c7b0ca8e08 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Tue, 15 Jan 2013 17:20:58 +0800 Subject: libata: identify and init ZPODD devices The ODD can be enabled for ZPODD if the following three conditions are satisfied: 1 The ODD supports device attention; 2 The platform can runtime power off the ODD through ACPI; 3 The ODD is either slot type or drawer type. For such ODDs, zpodd_init is called and a new structure is allocated for it to store ZPODD related stuffs. And the zpodd_dev_enabled function is used to test if ZPODD is currently enabled for this ODD. A new config CONFIG_SATA_ZPODD is added to selectively build ZPODD code. Signed-off-by: Aaron Lu Acked-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/Kconfig | 13 ++++++ drivers/ata/Makefile | 1 + drivers/ata/libata-core.c | 4 +- drivers/ata/libata-scsi.c | 2 + drivers/ata/libata-zpodd.c | 100 +++++++++++++++++++++++++++++++++++++++++++++ drivers/ata/libata.h | 14 +++++++ include/linux/libata.h | 3 ++ include/uapi/linux/cdrom.h | 34 +++++++++++++++ 8 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 drivers/ata/libata-zpodd.c (limited to 'include/uapi/linux') diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index e08d322d01d7..996d16c9c6e5 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -58,6 +58,19 @@ config ATA_ACPI You can disable this at kernel boot time by using the option libata.noacpi=1 +config SATA_ZPODD + bool "SATA Zero Power ODD Support" + depends on ATA_ACPI + default n + help + This option adds support for SATA ZPODD. It requires both + ODD and the platform support, and if enabled, will automatically + power on/off the ODD when certain condition is satisfied. This + does not impact user's experience of the ODD, only power is saved + when ODD is not in use(i.e. no disc inside). + + If unsure, say N. + config SATA_PMP bool "SATA Port Multiplier support" default y diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 9329dafba91b..85e3de463ed1 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -107,3 +107,4 @@ libata-y := libata-core.o libata-scsi.o libata-eh.o libata-transport.o libata-$(CONFIG_ATA_SFF) += libata-sff.o libata-$(CONFIG_SATA_PMP) += libata-pmp.o libata-$(CONFIG_ATA_ACPI) += libata-acpi.o +libata-$(CONFIG_SATA_ZPODD) += libata-zpodd.o diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 275941b576a8..c7ecd8492f1e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2401,8 +2401,10 @@ int ata_dev_configure(struct ata_device *dev) dma_dir_string = ", DMADIR"; } - if (ata_id_has_da(dev->id)) + if (ata_id_has_da(dev->id)) { dev->flags |= ATA_DFLAG_DA; + zpodd_init(dev); + } /* print device info to dmesg */ if (ata_msg_drv(ap) && print_info) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7c337e754dab..1ff018525e3b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3755,6 +3755,8 @@ static void ata_scsi_remove_dev(struct ata_device *dev) mutex_lock(&ap->scsi_host->scan_mutex); spin_lock_irqsave(ap->lock, flags); + if (zpodd_dev_enabled(dev)) + zpodd_exit(dev); ata_acpi_unbind(dev); /* clearing dev->sdev is protected by host lock */ diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c new file mode 100644 index 000000000000..27eed2f09a8a --- /dev/null +++ b/drivers/ata/libata-zpodd.c @@ -0,0 +1,100 @@ +#include +#include + +#include "libata.h" + +enum odd_mech_type { + ODD_MECH_TYPE_SLOT, + ODD_MECH_TYPE_DRAWER, + ODD_MECH_TYPE_UNSUPPORTED, +}; + +struct zpodd { + enum odd_mech_type mech_type; /* init during probe, RO afterwards */ + struct ata_device *dev; +}; + +/* Per the spec, only slot type and drawer type ODD can be supported */ +static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) +{ + char buf[16]; + unsigned int ret; + struct rm_feature_desc *desc = (void *)(buf + 8); + struct ata_taskfile tf = {}; + + char cdb[] = { GPCMD_GET_CONFIGURATION, + 2, /* only 1 feature descriptor requested */ + 0, 3, /* 3, removable medium feature */ + 0, 0, 0,/* reserved */ + 0, sizeof(buf), + 0, 0, 0, + }; + + tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; + tf.command = ATA_CMD_PACKET; + tf.protocol = ATAPI_PROT_PIO; + tf.lbam = sizeof(buf); + + ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, + buf, sizeof(buf), 0); + if (ret) + return ODD_MECH_TYPE_UNSUPPORTED; + + if (be16_to_cpu(desc->feature_code) != 3) + return ODD_MECH_TYPE_UNSUPPORTED; + + if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) + return ODD_MECH_TYPE_SLOT; + else if (desc->mech_type == 1 && desc->load == 0 && desc->eject == 1) + return ODD_MECH_TYPE_DRAWER; + else + return ODD_MECH_TYPE_UNSUPPORTED; +} + +static bool odd_can_poweroff(struct ata_device *ata_dev) +{ + acpi_handle handle; + acpi_status status; + struct acpi_device *acpi_dev; + + handle = ata_dev_acpi_handle(ata_dev); + if (!handle) + return false; + + status = acpi_bus_get_device(handle, &acpi_dev); + if (ACPI_FAILURE(status)) + return false; + + return acpi_device_can_poweroff(acpi_dev); +} + +void zpodd_init(struct ata_device *dev) +{ + enum odd_mech_type mech_type; + struct zpodd *zpodd; + + if (dev->zpodd) + return; + + if (!odd_can_poweroff(dev)) + return; + + mech_type = zpodd_get_mech_type(dev); + if (mech_type == ODD_MECH_TYPE_UNSUPPORTED) + return; + + zpodd = kzalloc(sizeof(struct zpodd), GFP_KERNEL); + if (!zpodd) + return; + + zpodd->mech_type = mech_type; + + zpodd->dev = dev; + dev->zpodd = zpodd; +} + +void zpodd_exit(struct ata_device *dev) +{ + kfree(dev->zpodd); + dev->zpodd = NULL; +} diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 7148a58020b9..a21740b4ee11 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -230,4 +230,18 @@ static inline void ata_sff_exit(void) { } #endif /* CONFIG_ATA_SFF */ +/* libata-zpodd.c */ +#ifdef CONFIG_SATA_ZPODD +void zpodd_init(struct ata_device *dev); +void zpodd_exit(struct ata_device *dev); +static inline bool zpodd_dev_enabled(struct ata_device *dev) +{ + return dev->zpodd != NULL; +} +#else /* CONFIG_SATA_ZPODD */ +static inline void zpodd_init(struct ata_device *dev) {} +static inline void zpodd_exit(struct ata_device *dev) {} +static inline bool zpodd_dev_enabled(struct ata_device *dev) { return false; } +#endif /* CONFIG_SATA_ZPODD */ + #endif /* __LIBATA_H__ */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 7ae207eb29a0..65ff67e34b77 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -620,6 +620,9 @@ struct ata_device { #ifdef CONFIG_ATA_ACPI union acpi_object *gtf_cache; unsigned int gtf_filter; +#endif +#ifdef CONFIG_SATA_ZPODD + void *zpodd; #endif struct device tdev; /* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */ diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h index 898b866b300c..bd17ad5aa06d 100644 --- a/include/uapi/linux/cdrom.h +++ b/include/uapi/linux/cdrom.h @@ -908,5 +908,39 @@ struct mode_page_header { __be16 desc_length; }; +/* removable medium feature descriptor */ +struct rm_feature_desc { + __be16 feature_code; +#if defined(__BIG_ENDIAN_BITFIELD) + __u8 reserved1:2; + __u8 feature_version:4; + __u8 persistent:1; + __u8 curr:1; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + __u8 curr:1; + __u8 persistent:1; + __u8 feature_version:4; + __u8 reserved1:2; +#endif + __u8 add_len; +#if defined(__BIG_ENDIAN_BITFIELD) + __u8 mech_type:3; + __u8 load:1; + __u8 eject:1; + __u8 pvnt_jmpr:1; + __u8 dbml:1; + __u8 lock:1; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + __u8 lock:1; + __u8 dbml:1; + __u8 pvnt_jmpr:1; + __u8 eject:1; + __u8 load:1; + __u8 mech_type:3; +#endif + __u8 reserved2; + __u8 reserved3; + __u8 reserved4; +}; #endif /* _UAPI_LINUX_CDROM_H */ -- cgit v1.2.3 From 749cf76c5a363e1383108a914ea09530bfa0bd43 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 20 Jan 2013 18:28:06 -0500 Subject: KVM: ARM: Initial skeleton to compile KVM support Targets KVM support for Cortex A-15 processors. Contains all the framework components, make files, header files, some tracing functionality, and basic user space API. Only supported core is Cortex-A15 for now. Most functionality is in arch/arm/kvm/* or arch/arm/include/asm/kvm_*.h. Reviewed-by: Will Deacon Reviewed-by: Marcelo Tosatti Signed-off-by: Rusty Russell Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 57 +++++- arch/arm/Kconfig | 2 + arch/arm/Makefile | 1 + arch/arm/include/asm/kvm_arm.h | 24 +++ arch/arm/include/asm/kvm_asm.h | 58 ++++++ arch/arm/include/asm/kvm_coproc.h | 24 +++ arch/arm/include/asm/kvm_emulate.h | 50 ++++++ arch/arm/include/asm/kvm_host.h | 114 ++++++++++++ arch/arm/include/uapi/asm/kvm.h | 106 +++++++++++ arch/arm/kvm/Kconfig | 55 ++++++ arch/arm/kvm/Makefile | 21 +++ arch/arm/kvm/arm.c | 350 +++++++++++++++++++++++++++++++++++++ arch/arm/kvm/coproc.c | 23 +++ arch/arm/kvm/emulate.c | 155 ++++++++++++++++ arch/arm/kvm/guest.c | 221 +++++++++++++++++++++++ arch/arm/kvm/init.S | 19 ++ arch/arm/kvm/interrupts.S | 19 ++ arch/arm/kvm/mmu.c | 17 ++ arch/arm/kvm/reset.c | 74 ++++++++ arch/arm/kvm/trace.h | 52 ++++++ include/uapi/linux/kvm.h | 7 + 21 files changed, 1445 insertions(+), 4 deletions(-) create mode 100644 arch/arm/include/asm/kvm_arm.h create mode 100644 arch/arm/include/asm/kvm_asm.h create mode 100644 arch/arm/include/asm/kvm_coproc.h create mode 100644 arch/arm/include/asm/kvm_emulate.h create mode 100644 arch/arm/include/asm/kvm_host.h create mode 100644 arch/arm/include/uapi/asm/kvm.h create mode 100644 arch/arm/kvm/Kconfig create mode 100644 arch/arm/kvm/Makefile create mode 100644 arch/arm/kvm/arm.c create mode 100644 arch/arm/kvm/coproc.c create mode 100644 arch/arm/kvm/emulate.c create mode 100644 arch/arm/kvm/guest.c create mode 100644 arch/arm/kvm/init.S create mode 100644 arch/arm/kvm/interrupts.S create mode 100644 arch/arm/kvm/mmu.c create mode 100644 arch/arm/kvm/reset.c create mode 100644 arch/arm/kvm/trace.h (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a4df5535996b..4237c27ea612 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -293,7 +293,7 @@ kvm_run' (see below). 4.11 KVM_GET_REGS Capability: basic -Architectures: all +Architectures: all except ARM Type: vcpu ioctl Parameters: struct kvm_regs (out) Returns: 0 on success, -1 on error @@ -314,7 +314,7 @@ struct kvm_regs { 4.12 KVM_SET_REGS Capability: basic -Architectures: all +Architectures: all except ARM Type: vcpu ioctl Parameters: struct kvm_regs (in) Returns: 0 on success, -1 on error @@ -600,7 +600,7 @@ struct kvm_fpu { 4.24 KVM_CREATE_IRQCHIP Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 +Architectures: x86, ia64, ARM Type: vm ioctl Parameters: none Returns: 0 on success, -1 on error @@ -608,7 +608,8 @@ Returns: 0 on success, -1 on error Creates an interrupt controller model in the kernel. On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 -only go to the IOAPIC. On ia64, a IOSAPIC is created. +only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM, a GIC is +created. 4.25 KVM_IRQ_LINE @@ -1775,6 +1776,14 @@ registers, find a list below: PPC | KVM_REG_PPC_VPA_DTL | 128 PPC | KVM_REG_PPC_EPCR | 32 +ARM registers are mapped using the lower 32 bits. The upper 16 of that +is the register group type, or coprocessor number: + +ARM core registers have the following id bit patterns: + 0x4002 0000 0010 + + + 4.69 KVM_GET_ONE_REG Capability: KVM_CAP_ONE_REG @@ -2127,6 +2136,46 @@ written, then `n_invalid' invalid entries, invalidating any previously valid entries found. +4.77 KVM_ARM_VCPU_INIT + +Capability: basic +Architectures: arm +Type: vcpu ioctl +Parameters: struct struct kvm_vcpu_init (in) +Returns: 0 on success; -1 on error +Errors: +  EINVAL:    the target is unknown, or the combination of features is invalid. +  ENOENT:    a features bit specified is unknown. + +This tells KVM what type of CPU to present to the guest, and what +optional features it should have.  This will cause a reset of the cpu +registers to their initial values.  If this is not called, KVM_RUN will +return ENOEXEC for that vcpu. + +Note that because some registers reflect machine topology, all vcpus +should be created before this ioctl is invoked. + + +4.78 KVM_GET_REG_LIST + +Capability: basic +Architectures: arm +Type: vcpu ioctl +Parameters: struct kvm_reg_list (in/out) +Returns: 0 on success; -1 on error +Errors: +  E2BIG:     the reg index list is too big to fit in the array specified by +             the user (the number required will be written into n). + +struct kvm_reg_list { + __u64 n; /* number of registers in reg[] */ + __u64 reg[0]; +}; + +This ioctl returns the guest registers that are supported for the +KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. + + 5. The kvm_run structure ------------------------ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 67874b82a4ed..e0627cdbcda5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2322,3 +2322,5 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" + +source "arch/arm/kvm/Kconfig" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 30c443c406f3..4bcd2d6b0535 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -252,6 +252,7 @@ core-$(CONFIG_FPE_NWFPE) += arch/arm/nwfpe/ core-$(CONFIG_FPE_FASTFPE) += $(FASTFPE_OBJ) core-$(CONFIG_VFP) += arch/arm/vfp/ core-$(CONFIG_XEN) += arch/arm/xen/ +core-$(CONFIG_KVM_ARM_HOST) += arch/arm/kvm/ # If we have a machine-specific directory, then include it in the build. core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/ diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h new file mode 100644 index 000000000000..dc678e193417 --- /dev/null +++ b/arch/arm/include/asm/kvm_arm.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_ARM_H__ +#define __ARM_KVM_ARM_H__ + +#include + +#endif /* __ARM_KVM_ARM_H__ */ diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h new file mode 100644 index 000000000000..f9993e5fb695 --- /dev/null +++ b/arch/arm/include/asm/kvm_asm.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_ASM_H__ +#define __ARM_KVM_ASM_H__ + +/* 0 is reserved as an invalid value. */ +#define c0_MPIDR 1 /* MultiProcessor ID Register */ +#define c0_CSSELR 2 /* Cache Size Selection Register */ +#define c1_SCTLR 3 /* System Control Register */ +#define c1_ACTLR 4 /* Auxilliary Control Register */ +#define c1_CPACR 5 /* Coprocessor Access Control */ +#define c2_TTBR0 6 /* Translation Table Base Register 0 */ +#define c2_TTBR0_high 7 /* TTBR0 top 32 bits */ +#define c2_TTBR1 8 /* Translation Table Base Register 1 */ +#define c2_TTBR1_high 9 /* TTBR1 top 32 bits */ +#define c2_TTBCR 10 /* Translation Table Base Control R. */ +#define c3_DACR 11 /* Domain Access Control Register */ +#define c5_DFSR 12 /* Data Fault Status Register */ +#define c5_IFSR 13 /* Instruction Fault Status Register */ +#define c5_ADFSR 14 /* Auxilary Data Fault Status R */ +#define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */ +#define c6_DFAR 16 /* Data Fault Address Register */ +#define c6_IFAR 17 /* Instruction Fault Address Register */ +#define c9_L2CTLR 18 /* Cortex A15 L2 Control Register */ +#define c10_PRRR 19 /* Primary Region Remap Register */ +#define c10_NMRR 20 /* Normal Memory Remap Register */ +#define c12_VBAR 21 /* Vector Base Address Register */ +#define c13_CID 22 /* Context ID Register */ +#define c13_TID_URW 23 /* Thread ID, User R/W */ +#define c13_TID_URO 24 /* Thread ID, User R/O */ +#define c13_TID_PRIV 25 /* Thread ID, Privileged */ +#define NR_CP15_REGS 26 /* Number of regs (incl. invalid) */ + +#define ARM_EXCEPTION_RESET 0 +#define ARM_EXCEPTION_UNDEFINED 1 +#define ARM_EXCEPTION_SOFTWARE 2 +#define ARM_EXCEPTION_PREF_ABORT 3 +#define ARM_EXCEPTION_DATA_ABORT 4 +#define ARM_EXCEPTION_IRQ 5 +#define ARM_EXCEPTION_FIQ 6 + +#endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm/include/asm/kvm_coproc.h b/arch/arm/include/asm/kvm_coproc.h new file mode 100644 index 000000000000..b6d023deb426 --- /dev/null +++ b/arch/arm/include/asm/kvm_coproc.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2012 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_COPROC_H__ +#define __ARM_KVM_COPROC_H__ +#include + +void kvm_reset_coprocs(struct kvm_vcpu *vcpu); + +#endif /* __ARM_KVM_COPROC_H__ */ diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h new file mode 100644 index 000000000000..17dad674b90f --- /dev/null +++ b/arch/arm/include/asm/kvm_emulate.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_EMULATE_H__ +#define __ARM_KVM_EMULATE_H__ + +#include +#include + +u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); +u32 *vcpu_spsr(struct kvm_vcpu *vcpu); + +static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu) +{ + return (u32 *)&vcpu->arch.regs.usr_regs.ARM_pc; +} + +static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu) +{ + return (u32 *)&vcpu->arch.regs.usr_regs.ARM_cpsr; +} + +static inline bool mode_has_spsr(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; + return (cpsr_mode > USR_MODE && cpsr_mode < SYSTEM_MODE); +} + +static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; + return cpsr_mode > USR_MODE;; +} + +#endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h new file mode 100644 index 000000000000..0d9938a20751 --- /dev/null +++ b/arch/arm/include/asm/kvm_host.h @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_HOST_H__ +#define __ARM_KVM_HOST_H__ + +#include +#include + +#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#define KVM_MEMORY_SLOTS 32 +#define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +#define KVM_VCPU_MAX_FEATURES 0 + +/* We don't currently support large pages. */ +#define KVM_HPAGE_GFN_SHIFT(x) 0 +#define KVM_NR_PAGE_SIZES 1 +#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) + +struct kvm_vcpu; +u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); +int kvm_target_cpu(void); +int kvm_reset_vcpu(struct kvm_vcpu *vcpu); +void kvm_reset_coprocs(struct kvm_vcpu *vcpu); + +struct kvm_arch { + /* VTTBR value associated with below pgd and vmid */ + u64 vttbr; + + /* + * Anything that is not used directly from assembly code goes + * here. + */ + + /* The VMID generation used for the virt. memory system */ + u64 vmid_gen; + u32 vmid; + + /* Stage-2 page table */ + pgd_t *pgd; +}; + +#define KVM_NR_MEM_OBJS 40 + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +struct kvm_vcpu_arch { + struct kvm_regs regs; + + int target; /* Processor target */ + DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); + + /* System control coprocessor (cp15) */ + u32 cp15[NR_CP15_REGS]; + + /* The CPU type we expose to the VM */ + u32 midr; + + /* Exception Information */ + u32 hsr; /* Hyp Syndrome Register */ + u32 hxfar; /* Hyp Data/Inst Fault Address Register */ + u32 hpfar; /* Hyp IPA Fault Address Register */ + + /* Interrupt related fields */ + u32 irq_lines; /* IRQ and FIQ levels */ + + /* Hyp exception information */ + u32 hyp_pc; /* PC when exception was taken from Hyp mode */ + + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; +}; + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_vcpu_stat { + u32 halt_wakeup; +}; + +struct kvm_vcpu_init; +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init); +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +struct kvm_one_reg; +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +#endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h new file mode 100644 index 000000000000..1083327b5fcd --- /dev/null +++ b/arch/arm/include/uapi/asm/kvm.h @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_H__ +#define __ARM_KVM_H__ + +#include +#include + +#define __KVM_HAVE_GUEST_DEBUG + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + +/* Valid for svc_regs, abt_regs, und_regs, irq_regs in struct kvm_regs */ +#define KVM_ARM_SVC_sp svc_regs[0] +#define KVM_ARM_SVC_lr svc_regs[1] +#define KVM_ARM_SVC_spsr svc_regs[2] +#define KVM_ARM_ABT_sp abt_regs[0] +#define KVM_ARM_ABT_lr abt_regs[1] +#define KVM_ARM_ABT_spsr abt_regs[2] +#define KVM_ARM_UND_sp und_regs[0] +#define KVM_ARM_UND_lr und_regs[1] +#define KVM_ARM_UND_spsr und_regs[2] +#define KVM_ARM_IRQ_sp irq_regs[0] +#define KVM_ARM_IRQ_lr irq_regs[1] +#define KVM_ARM_IRQ_spsr irq_regs[2] + +/* Valid only for fiq_regs in struct kvm_regs */ +#define KVM_ARM_FIQ_r8 fiq_regs[0] +#define KVM_ARM_FIQ_r9 fiq_regs[1] +#define KVM_ARM_FIQ_r10 fiq_regs[2] +#define KVM_ARM_FIQ_fp fiq_regs[3] +#define KVM_ARM_FIQ_ip fiq_regs[4] +#define KVM_ARM_FIQ_sp fiq_regs[5] +#define KVM_ARM_FIQ_lr fiq_regs[6] +#define KVM_ARM_FIQ_spsr fiq_regs[7] + +struct kvm_regs { + struct pt_regs usr_regs;/* R0_usr - R14_usr, PC, CPSR */ + __u32 svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */ + __u32 abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */ + __u32 und_regs[3]; /* SP_und, LR_und, SPSR_und */ + __u32 irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */ + __u32 fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */ +}; + +/* Supported Processor Types */ +#define KVM_ARM_TARGET_CORTEX_A15 0 +#define KVM_ARM_NUM_TARGETS 1 + +struct kvm_vcpu_init { + __u32 target; + __u32 features[7]; +}; + +struct kvm_sregs { +}; + +struct kvm_fpu { +}; + +struct kvm_guest_debug_arch { +}; + +struct kvm_debug_exit_arch { +}; + +struct kvm_sync_regs { +}; + +struct kvm_arch_memory_slot { +}; + +/* If you need to interpret the index values, here is the key: */ +#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 +#define KVM_REG_ARM_COPROC_SHIFT 16 +#define KVM_REG_ARM_32_OPC2_MASK 0x0000000000000007 +#define KVM_REG_ARM_32_OPC2_SHIFT 0 +#define KVM_REG_ARM_OPC1_MASK 0x0000000000000078 +#define KVM_REG_ARM_OPC1_SHIFT 3 +#define KVM_REG_ARM_CRM_MASK 0x0000000000000780 +#define KVM_REG_ARM_CRM_SHIFT 7 +#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 +#define KVM_REG_ARM_32_CRN_SHIFT 11 + +/* Normal registers are mapped as coprocessor 16. */ +#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) + +#endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig new file mode 100644 index 000000000000..4a01b6fbf380 --- /dev/null +++ b/arch/arm/kvm/Kconfig @@ -0,0 +1,55 @@ +# +# KVM configuration +# + +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + bool "Kernel-based Virtual Machine (KVM) support" + select PREEMPT_NOTIFIERS + select ANON_INODES + select KVM_MMIO + select KVM_ARM_HOST + depends on ARM_VIRT_EXT && ARM_LPAE + ---help--- + Support hosting virtualized guest machines. You will also + need to select one or more of the processor modules below. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +config KVM_ARM_HOST + bool "KVM host support for ARM cpus." + depends on KVM + depends on MMU + ---help--- + Provides host support for ARM processors. + +config KVM_ARM_MAX_VCPUS + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 + help + Static number of max supported virtual CPUs per VM. + + If you choose a high number, the vcpu structures will be quite + large, so only choose a reasonable number that you expect to + actually use. + +source drivers/virtio/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile new file mode 100644 index 000000000000..dfc293f277b3 --- /dev/null +++ b/arch/arm/kvm/Makefile @@ -0,0 +1,21 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +plus_virt := $(call as-instr,.arch_extension virt,+virt) +ifeq ($(plus_virt),+virt) + plus_virt_def := -DREQUIRES_VIRT=1 +endif + +ccflags-y += -Ivirt/kvm -Iarch/arm/kvm +CFLAGS_arm.o := -I. $(plus_virt_def) +CFLAGS_mmu.o := -I. + +AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) +AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) + +kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) + +obj-y += kvm-arm.o init.o interrupts.o +obj-y += arm.o guest.o mmu.o emulate.o reset.o +obj-y += coproc.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c new file mode 100644 index 000000000000..d3506b4001aa --- /dev/null +++ b/arch/arm/kvm/arm.c @@ -0,0 +1,350 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include "trace.h" + +#include +#include +#include +#include +#include + +#ifdef REQUIRES_VIRT +__asm__(".arch_extension virt"); +#endif + +int kvm_arch_hardware_enable(void *garbage) +{ + return 0; +} + +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; +} + +void kvm_arch_hardware_disable(void *garbage) +{ +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + *(int *)rtn = 0; +} + +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + if (type) + return -EINVAL; + + return 0; +} + +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + int i; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_vcpu_free(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + int r; + switch (ext) { + case KVM_CAP_USER_MEMORY: + case KVM_CAP_SYNC_MMU: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_ONE_REG: + r = 1; + break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; + break; + case KVM_CAP_NR_VCPUS: + r = num_online_cpus(); + break; + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; + default: + r = 0; + break; + } + return r; +} + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + return 0; +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_memory_slot old, + struct kvm_userspace_memory_region *mem, + int user_alloc) +{ + return 0; +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +{ + int err; + struct kvm_vcpu *vcpu; + + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) { + err = -ENOMEM; + goto out; + } + + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + return vcpu; +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu); +out: + return ERR_PTR(err); +} + +int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_arch_vcpu_free(vcpu); +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int __attribute_const__ kvm_target_cpu(void) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); + + if (implementor != ARM_CPU_IMP_ARM) + return -EINVAL; + + switch (part_number) { + case ARM_CPU_PART_CORTEX_A15: + return KVM_ARM_TARGET_CORTEX_A15; + default: + return -EINVAL; + } +} + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} + + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) +{ + return 0; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + return -EINVAL; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_ARM_VCPU_INIT: { + struct kvm_vcpu_init init; + + if (copy_from_user(&init, argp, sizeof(init))) + return -EFAULT; + + return kvm_vcpu_set_target(vcpu, &init); + + } + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + if (copy_from_user(®, argp, sizeof(reg))) + return -EFAULT; + if (ioctl == KVM_SET_ONE_REG) + return kvm_arm_set_reg(vcpu, ®); + else + return kvm_arm_get_reg(vcpu, ®); + } + case KVM_GET_REG_LIST: { + struct kvm_reg_list __user *user_list = argp; + struct kvm_reg_list reg_list; + unsigned n; + + if (copy_from_user(®_list, user_list, sizeof(reg_list))) + return -EFAULT; + n = reg_list.n; + reg_list.n = kvm_arm_num_regs(vcpu); + if (copy_to_user(user_list, ®_list, sizeof(reg_list))) + return -EFAULT; + if (n < reg_list.n) + return -E2BIG; + return kvm_arm_copy_reg_indices(vcpu, user_list->reg); + } + default: + return -EINVAL; + } +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +{ + return -EINVAL; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_init(void *opaque) +{ + return 0; +} + +/* NOP: Compiling as a module not supported */ +void kvm_arch_exit(void) +{ +} + +static int arm_init(void) +{ + int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + return rc; +} + +module_init(arm_init); diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c new file mode 100644 index 000000000000..0c433558591c --- /dev/null +++ b/arch/arm/kvm/coproc.c @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell + * Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include + +void kvm_reset_coprocs(struct kvm_vcpu *vcpu) +{ +} diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c new file mode 100644 index 000000000000..3eadc25e95de --- /dev/null +++ b/arch/arm/kvm/emulate.c @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include + +#define VCPU_NR_MODES 6 +#define VCPU_REG_OFFSET_USR 0 +#define VCPU_REG_OFFSET_FIQ 1 +#define VCPU_REG_OFFSET_IRQ 2 +#define VCPU_REG_OFFSET_SVC 3 +#define VCPU_REG_OFFSET_ABT 4 +#define VCPU_REG_OFFSET_UND 5 +#define REG_OFFSET(_reg) \ + (offsetof(struct kvm_regs, _reg) / sizeof(u32)) + +#define USR_REG_OFFSET(_num) REG_OFFSET(usr_regs.uregs[_num]) + +static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = { + /* USR/SYS Registers */ + [VCPU_REG_OFFSET_USR] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), + }, + + /* FIQ Registers */ + [VCPU_REG_OFFSET_FIQ] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), + REG_OFFSET(fiq_regs[0]), /* r8 */ + REG_OFFSET(fiq_regs[1]), /* r9 */ + REG_OFFSET(fiq_regs[2]), /* r10 */ + REG_OFFSET(fiq_regs[3]), /* r11 */ + REG_OFFSET(fiq_regs[4]), /* r12 */ + REG_OFFSET(fiq_regs[5]), /* r13 */ + REG_OFFSET(fiq_regs[6]), /* r14 */ + }, + + /* IRQ Registers */ + [VCPU_REG_OFFSET_IRQ] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(irq_regs[0]), /* r13 */ + REG_OFFSET(irq_regs[1]), /* r14 */ + }, + + /* SVC Registers */ + [VCPU_REG_OFFSET_SVC] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(svc_regs[0]), /* r13 */ + REG_OFFSET(svc_regs[1]), /* r14 */ + }, + + /* ABT Registers */ + [VCPU_REG_OFFSET_ABT] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(abt_regs[0]), /* r13 */ + REG_OFFSET(abt_regs[1]), /* r14 */ + }, + + /* UND Registers */ + [VCPU_REG_OFFSET_UND] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(und_regs[0]), /* r13 */ + REG_OFFSET(und_regs[1]), /* r14 */ + }, +}; + +/* + * Return a pointer to the register number valid in the current mode of + * the virtual CPU. + */ +u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) +{ + u32 *reg_array = (u32 *)&vcpu->arch.regs; + u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK; + + switch (mode) { + case USR_MODE...SVC_MODE: + mode &= ~MODE32_BIT; /* 0 ... 3 */ + break; + + case ABT_MODE: + mode = VCPU_REG_OFFSET_ABT; + break; + + case UND_MODE: + mode = VCPU_REG_OFFSET_UND; + break; + + case SYSTEM_MODE: + mode = VCPU_REG_OFFSET_USR; + break; + + default: + BUG(); + } + + return reg_array + vcpu_reg_offsets[mode][reg_num]; +} + +/* + * Return the SPSR for the current mode of the virtual CPU. + */ +u32 *vcpu_spsr(struct kvm_vcpu *vcpu) +{ + u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK; + switch (mode) { + case SVC_MODE: + return &vcpu->arch.regs.KVM_ARM_SVC_spsr; + case ABT_MODE: + return &vcpu->arch.regs.KVM_ARM_ABT_spsr; + case UND_MODE: + return &vcpu->arch.regs.KVM_ARM_UND_spsr; + case IRQ_MODE: + return &vcpu->arch.regs.KVM_ARM_IRQ_spsr; + case FIQ_MODE: + return &vcpu->arch.regs.KVM_ARM_FIQ_spsr; + default: + BUG(); + } +} diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c new file mode 100644 index 000000000000..a12eb229021d --- /dev/null +++ b/arch/arm/kvm/guest.c @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM } +#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static u64 core_reg_offset_from_id(u64 id) +{ + return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); +} + +static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + u32 __user *uaddr = (u32 __user *)(long)reg->addr; + struct kvm_regs *regs = &vcpu->arch.regs; + u64 off; + + if (KVM_REG_SIZE(reg->id) != 4) + return -ENOENT; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= sizeof(*regs) / KVM_REG_SIZE(reg->id)) + return -ENOENT; + + return put_user(((u32 *)regs)[off], uaddr); +} + +static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + u32 __user *uaddr = (u32 __user *)(long)reg->addr; + struct kvm_regs *regs = &vcpu->arch.regs; + u64 off, val; + + if (KVM_REG_SIZE(reg->id) != 4) + return -ENOENT; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= sizeof(*regs) / KVM_REG_SIZE(reg->id)) + return -ENOENT; + + if (get_user(val, uaddr) != 0) + return -EFAULT; + + if (off == KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr)) { + unsigned long mode = val & MODE_MASK; + switch (mode) { + case USR_MODE: + case FIQ_MODE: + case IRQ_MODE: + case SVC_MODE: + case ABT_MODE: + case UND_MODE: + break; + default: + return -EINVAL; + } + } + + ((u32 *)regs)[off] = val; + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +static unsigned long num_core_regs(void) +{ + return sizeof(struct kvm_regs) / sizeof(u32); +} + +/** + * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG + * + * This is for all registers. + */ +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) +{ + return num_core_regs(); +} + +/** + * kvm_arm_copy_reg_indices - get indices of all registers. + * + * We do core registers right here, then we apppend coproc regs. + */ +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; + + for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { + if (put_user(core_reg | i, uindices)) + return -EFAULT; + uindices++; + } + + return 0; +} + +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM >> 32) + return -EINVAL; + + /* Register group 16 means we want a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return get_core_reg(vcpu, reg); + + return -EINVAL; +} + +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM >> 32) + return -EINVAL; + + /* Register group 16 means we set a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return set_core_reg(vcpu, reg); + + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i; + + /* We can only do a cortex A15 for now. */ + if (init->target != kvm_target_cpu()) + return -EINVAL; + + vcpu->arch.target = init->target; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + if (test_bit(i, (void *)init->features)) { + if (i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + set_bit(i, vcpu->arch.features); + } + } + + /* Now we know what it is, we can reset it. */ + return kvm_reset_vcpu(vcpu); +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; +} diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S new file mode 100644 index 000000000000..1dc8926e26d2 --- /dev/null +++ b/arch/arm/kvm/init.S @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include +#include diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S new file mode 100644 index 000000000000..1dc8926e26d2 --- /dev/null +++ b/arch/arm/kvm/interrupts.S @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include +#include diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c new file mode 100644 index 000000000000..10ed4643269f --- /dev/null +++ b/arch/arm/kvm/mmu.c @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c new file mode 100644 index 000000000000..b80256b554cd --- /dev/null +++ b/arch/arm/kvm/reset.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/****************************************************************************** + * Cortex-A15 Reset Values + */ + +static const int a15_max_cpu_idx = 3; + +static struct kvm_regs a15_regs_reset = { + .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, +}; + + +/******************************************************************************* + * Exported reset function + */ + +/** + * kvm_reset_vcpu - sets core registers and cp15 registers to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on the + * virtual CPU struct to their architectually defined reset values. + */ +int kvm_reset_vcpu(struct kvm_vcpu *vcpu) +{ + struct kvm_regs *cpu_reset; + + switch (vcpu->arch.target) { + case KVM_ARM_TARGET_CORTEX_A15: + if (vcpu->vcpu_id > a15_max_cpu_idx) + return -EINVAL; + cpu_reset = &a15_regs_reset; + vcpu->arch.midr = read_cpuid_id(); + break; + default: + return -ENODEV; + } + + /* Reset core registers */ + memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs)); + + /* Reset CP15 registers */ + kvm_reset_coprocs(vcpu); + + return 0; +} diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h new file mode 100644 index 000000000000..f8869c19c0a3 --- /dev/null +++ b/arch/arm/kvm/trace.h @@ -0,0 +1,52 @@ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +/* + * Tracepoints for entry/exit to guest + */ +TRACE_EVENT(kvm_entry, + TP_PROTO(unsigned long vcpu_pc), + TP_ARGS(vcpu_pc), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("PC: 0x%08lx", __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_exit, + TP_PROTO(unsigned long vcpu_pc), + TP_ARGS(vcpu_pc), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("PC: 0x%08lx", __entry->vcpu_pc) +); + + + +#endif /* _TRACE_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH arch/arm/kvm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e6e5d4b13708..24978d525c6e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -764,6 +764,11 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_U512 0x0060000000000000ULL #define KVM_REG_SIZE_U1024 0x0070000000000000ULL +struct kvm_reg_list { + __u64 n; /* number of regs */ + __u64 reg[0]; +}; + struct kvm_one_reg { __u64 id; __u64 addr; @@ -932,6 +937,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) /* VM is being stopped by host */ #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) +#define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -- cgit v1.2.3 From 86ce85352f0da7e1431ad8efcb04323819a620e7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 20 Jan 2013 18:28:08 -0500 Subject: KVM: ARM: Inject IRQs and FIQs from userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All interrupt injection is now based on the VM ioctl KVM_IRQ_LINE. This works semantically well for the GIC as we in fact raise/lower a line on a machine component (the gic). The IOCTL uses the follwing struct. struct kvm_irq_level { union { __u32 irq; /* GSI */ __s32 status; /* not used for KVM_IRQ_LEVEL */ }; __u32 level; /* 0 or 1 */ }; ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for specific cpus. The irq field is interpreted like this:  bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 | field: | irq_type | vcpu_index | irq_number | The irq_type field has the following values: - irq_type[0]: out-of-kernel GIC: irq_number 0 is IRQ, irq_number 1 is FIQ - irq_type[1]: in-kernel GIC: SPI, irq_number between 32 and 1019 (incl.) (the vcpu_index field is ignored) - irq_type[2]: in-kernel GIC: PPI, irq_number between 16 and 31 (incl.) The irq_number thus corresponds to the irq ID in as in the GICv2 specs. This is documented in Documentation/kvm/api.txt. Reviewed-by: Will Deacon Reviewed-by: Marcelo Tosatti Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 25 ++++++++++++--- arch/arm/include/asm/kvm_arm.h | 1 + arch/arm/include/uapi/asm/kvm.h | 21 +++++++++++++ arch/arm/kvm/arm.c | 65 +++++++++++++++++++++++++++++++++++++++ arch/arm/kvm/trace.h | 25 +++++++++++++++ include/uapi/linux/kvm.h | 1 + 6 files changed, 134 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4237c27ea612..505049299298 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -615,15 +615,32 @@ created. 4.25 KVM_IRQ_LINE Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 +Architectures: x86, ia64, arm Type: vm ioctl Parameters: struct kvm_irq_level Returns: 0 on success, -1 on error Sets the level of a GSI input to the interrupt controller model in the kernel. -Requires that an interrupt controller model has been previously created with -KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level -to be set to 1 and then back to 0. +On some architectures it is required that an interrupt controller model has +been previously created with KVM_CREATE_IRQCHIP. Note that edge-triggered +interrupts require the level to be set to 1 and then back to 0. + +ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip +(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for +specific cpus. The irq field is interpreted like this: + +  bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 | + field: | irq_type | vcpu_index | irq_id | + +The irq_type field has the following values: +- irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ +- irq_type[1]: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.) + (the vcpu_index field is ignored) +- irq_type[2]: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.) + +(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs) + +In both cases, level is used to raise/lower the line. struct kvm_irq_level { union { diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 8875b3f605a7..d64b5250ad4e 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -68,6 +68,7 @@ #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ HCR_SWIO | HCR_TIDCP) +#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) /* Hyp System Control Register (HSCTLR) bits */ #define HSCTLR_TE (1 << 30) diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 1083327b5fcd..53f45f146875 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -23,6 +23,7 @@ #include #define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_IRQ_LINE #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -103,4 +104,24 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) +/* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_TYPE_SHIFT 24 +#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_VCPU_SHIFT 16 +#define KVM_ARM_IRQ_VCPU_MASK 0xff +#define KVM_ARM_IRQ_NUM_SHIFT 0 +#define KVM_ARM_IRQ_NUM_MASK 0xffff + +/* irq_type field */ +#define KVM_ARM_IRQ_TYPE_CPU 0 +#define KVM_ARM_IRQ_TYPE_SPI 1 +#define KVM_ARM_IRQ_TYPE_PPI 2 + +/* out-of-kernel GIC cpu interrupt injection irq_number field */ +#define KVM_ARM_IRQ_CPU_IRQ 0 +#define KVM_ARM_IRQ_CPU_FIQ 1 + +/* Highest supported SPI, from VGIC_NR_IRQS */ +#define KVM_ARM_IRQ_GIC_MAX 127 + #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d810afb6cb84..2101152c3a4b 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -284,6 +285,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + vcpu->cpu = cpu; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -319,6 +321,69 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) return -EINVAL; } +static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) +{ + int bit_index; + bool set; + unsigned long *ptr; + + if (number == KVM_ARM_IRQ_CPU_IRQ) + bit_index = __ffs(HCR_VI); + else /* KVM_ARM_IRQ_CPU_FIQ */ + bit_index = __ffs(HCR_VF); + + ptr = (unsigned long *)&vcpu->arch.irq_lines; + if (level) + set = test_and_set_bit(bit_index, ptr); + else + set = test_and_clear_bit(bit_index, ptr); + + /* + * If we didn't change anything, no need to wake up or kick other CPUs + */ + if (set == level) + return 0; + + /* + * The vcpu irq_lines field was updated, wake up sleeping VCPUs and + * trigger a world-switch round on the running physical CPU to set the + * virtual IRQ/FIQ fields in the HCR appropriately. + */ + kvm_vcpu_kick(vcpu); + + return 0; +} + +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) +{ + u32 irq = irq_level->irq; + unsigned int irq_type, vcpu_idx, irq_num; + int nrcpus = atomic_read(&kvm->online_vcpus); + struct kvm_vcpu *vcpu = NULL; + bool level = irq_level->level; + + irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; + vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; + irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK; + + trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level); + + if (irq_type != KVM_ARM_IRQ_TYPE_CPU) + return -EINVAL; + + if (vcpu_idx >= nrcpus) + return -EINVAL; + + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; + + if (irq_num > KVM_ARM_IRQ_CPU_FIQ) + return -EINVAL; + + return vcpu_interrupt_line(vcpu, irq_num, level); +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 862b2cc12fbe..105d1f79909a 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -39,6 +39,31 @@ TRACE_EVENT(kvm_exit, TP_printk("PC: 0x%08lx", __entry->vcpu_pc) ); +TRACE_EVENT(kvm_irq_line, + TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), + TP_ARGS(type, vcpu_idx, irq_num, level), + + TP_STRUCT__entry( + __field( unsigned int, type ) + __field( int, vcpu_idx ) + __field( int, irq_num ) + __field( int, level ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->vcpu_idx = vcpu_idx; + __entry->irq_num = irq_num; + __entry->level = level; + ), + + TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d", + (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" : + (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" : + (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN", + __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level) +); + TRACE_EVENT(kvm_unmap_hva, TP_PROTO(unsigned long hva), TP_ARGS(hva), diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 24978d525c6e..dc63665e73ad 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -115,6 +115,7 @@ struct kvm_irq_level { * ACPI gsi notion of irq. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. + * For ARM: See Documentation/virtual/kvm/api.txt */ union { __u32 irq; -- cgit v1.2.3 From aa024c2f35a07cc32e48c5f62a5807be01c09249 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 20 Jan 2013 18:28:13 -0500 Subject: KVM: ARM: Power State Coordination Interface implementation Implement the PSCI specification (ARM DEN 0022A) to control virtual CPUs being "powered" on or off. PSCI/KVM is detected using the KVM_CAP_ARM_PSCI capability. A virtual CPU can now be initialized in a "powered off" state, using the KVM_ARM_VCPU_POWER_OFF feature flag. The guest can use either SMC or HVC to execute a PSCI function. Reviewed-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 4 ++ arch/arm/include/asm/kvm_emulate.h | 10 ++++ arch/arm/include/asm/kvm_host.h | 5 +- arch/arm/include/asm/kvm_psci.h | 23 ++++++++ arch/arm/include/uapi/asm/kvm.h | 16 ++++++ arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/arm.c | 30 ++++++++++- arch/arm/kvm/psci.c | 108 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 9 files changed, 195 insertions(+), 4 deletions(-) create mode 100644 arch/arm/include/asm/kvm_psci.h create mode 100644 arch/arm/kvm/psci.c (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 38066a7a74e1..c25439a58274 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2185,6 +2185,10 @@ return ENOEXEC for that vcpu. Note that because some registers reflect machine topology, all vcpus should be created before this ioctl is invoked. +Possible features: + - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state. + Depends on KVM_CAP_ARM_PSCI. + 4.78 KVM_GET_REG_LIST diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 4c1a073280be..fd611996bfb5 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -32,6 +32,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) +{ + return 1; +} + static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu) { return (u32 *)&vcpu->arch.regs.usr_regs.ARM_pc; @@ -42,6 +47,11 @@ static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu) return (u32 *)&vcpu->arch.regs.usr_regs.ARM_cpsr; } +static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) +{ + *vcpu_cpsr(vcpu) |= PSR_T_BIT; +} + static inline bool mode_has_spsr(struct kvm_vcpu *vcpu) { unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index e65fc967a71d..98b4d1a72923 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -30,7 +30,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG -#define KVM_VCPU_MAX_FEATURES 0 +#define KVM_VCPU_MAX_FEATURES 1 /* We don't currently support large pages. */ #define KVM_HPAGE_GFN_SHIFT(x) 0 @@ -100,6 +100,9 @@ struct kvm_vcpu_arch { int last_pcpu; cpumask_t require_dcache_flush; + /* Don't run the guest on this vcpu */ + bool pause; + /* IO related fields */ struct kvm_decode mmio_decode; diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h new file mode 100644 index 000000000000..9a83d98bf170 --- /dev/null +++ b/arch/arm/include/asm/kvm_psci.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM_KVM_PSCI_H__ +#define __ARM_KVM_PSCI_H__ + +bool kvm_psci_call(struct kvm_vcpu *vcpu); + +#endif /* __ARM_KVM_PSCI_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index bbb6b2328004..3303ff5adbf3 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -65,6 +65,8 @@ struct kvm_regs { #define KVM_ARM_TARGET_CORTEX_A15 0 #define KVM_ARM_NUM_TARGETS 1 +#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ + struct kvm_vcpu_init { __u32 target; __u32 features[7]; @@ -145,4 +147,18 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* PSCI interface */ +#define KVM_PSCI_FN_BASE 0x95c1ba5e +#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) + +#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0) +#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1) +#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) +#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) + +#define KVM_PSCI_RET_SUCCESS 0 +#define KVM_PSCI_RET_NI ((unsigned long)-1) +#define KVM_PSCI_RET_INVAL ((unsigned long)-2) +#define KVM_PSCI_RET_DENIED ((unsigned long)-3) + #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 1e45cd97a7fc..ea27987bd07f 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -18,4 +18,4 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o +obj-y += coproc.o coproc_a15.o mmio.o psci.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 8680b9ffd2ae..2d30e3afdaf9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #ifdef REQUIRES_VIRT @@ -160,6 +161,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: + case KVM_CAP_ARM_PSCI: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -443,14 +445,18 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), vcpu->arch.hsr & HSR_HVC_IMM_MASK); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - /* We don't support SMC; don't do that. */ - kvm_debug("smc: at %08x", *vcpu_pc(vcpu)); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } @@ -589,9 +595,26 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return 0; vcpu->arch.has_run_once = true; + + /* + * Handle the "start in power-off" case by calling into the + * PSCI code. + */ + if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) { + *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF; + kvm_psci_call(vcpu); + } + return 0; } +static void vcpu_pause(struct kvm_vcpu *vcpu) +{ + wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); + + wait_event_interruptible(*wq, !vcpu->arch.pause); +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -635,6 +658,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) update_vttbr(vcpu->kvm); + if (vcpu->arch.pause) + vcpu_pause(vcpu); + local_irq_disable(); /* diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c new file mode 100644 index 000000000000..7ee5bb7a3667 --- /dev/null +++ b/arch/arm/kvm/psci.c @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include + +/* + * This is an implementation of the Power State Coordination Interface + * as described in ARM document number ARM DEN 0022A. + */ + +static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pause = true; +} + +static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) +{ + struct kvm *kvm = source_vcpu->kvm; + struct kvm_vcpu *vcpu; + wait_queue_head_t *wq; + unsigned long cpu_id; + phys_addr_t target_pc; + + cpu_id = *vcpu_reg(source_vcpu, 1); + if (vcpu_mode_is_32bit(source_vcpu)) + cpu_id &= ~((u32) 0); + + if (cpu_id >= atomic_read(&kvm->online_vcpus)) + return KVM_PSCI_RET_INVAL; + + target_pc = *vcpu_reg(source_vcpu, 2); + + vcpu = kvm_get_vcpu(kvm, cpu_id); + + wq = kvm_arch_vcpu_wq(vcpu); + if (!waitqueue_active(wq)) + return KVM_PSCI_RET_INVAL; + + kvm_reset_vcpu(vcpu); + + /* Gracefully handle Thumb2 entry point */ + if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) { + target_pc &= ~((phys_addr_t) 1); + vcpu_set_thumb(vcpu); + } + + *vcpu_pc(vcpu) = target_pc; + vcpu->arch.pause = false; + smp_mb(); /* Make sure the above is visible */ + + wake_up_interruptible(wq); + + return KVM_PSCI_RET_SUCCESS; +} + +/** + * kvm_psci_call - handle PSCI call if r0 value is in range + * @vcpu: Pointer to the VCPU struct + * + * Handle PSCI calls from guests through traps from HVC or SMC instructions. + * The calling convention is similar to SMC calls to the secure world where + * the function number is placed in r0 and this function returns true if the + * function number specified in r0 is withing the PSCI range, and false + * otherwise. + */ +bool kvm_psci_call(struct kvm_vcpu *vcpu) +{ + unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); + unsigned long val; + + switch (psci_fn) { + case KVM_PSCI_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = KVM_PSCI_RET_SUCCESS; + break; + case KVM_PSCI_FN_CPU_ON: + val = kvm_psci_vcpu_on(vcpu); + break; + case KVM_PSCI_FN_CPU_SUSPEND: + case KVM_PSCI_FN_MIGRATE: + val = KVM_PSCI_RET_NI; + break; + + default: + return false; + } + + *vcpu_reg(vcpu, 0) = val; + return true; +} diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dc63665e73ad..7f2360a46fc2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -636,6 +636,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 +#define KVM_CAP_ARM_PSCI 87 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 9569793a79836320c33d400c686dcb78f886bdad Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 6 Jan 2013 12:22:06 -0300 Subject: [media] dvb: Add DVBv5 statistics properties The DVBv3 statistics parameters are limited on several ways: - It doesn't provide any way to indicate the used measure, so userspace need to guess how to calculate/use it; - Only a limited set of stats are supported; - Can't be called in a way to require them to be filled all at once (atomic reads from the hardware), with may cause troubles on interpreting them on userspace; - On some OFDM delivery systems, the carriers can be independently modulated, having different properties. Currently, there's no way to report per-layer stats. To address the above issues, adding a new DVBv5-based stats API. While here, correct inner code nomenclature on a few places. Reviewed-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/dvb/dvbapi.xml | 2 +- Documentation/DocBook/media/dvb/dvbproperty.xml | 180 +++++++++++++++++++++++- Documentation/DocBook/media/dvb/frontend.xml | 2 +- include/uapi/linux/dvb/frontend.h | 79 ++++++++++- include/uapi/linux/dvb/version.h | 2 +- 5 files changed, 260 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/dvb/dvbapi.xml b/Documentation/DocBook/media/dvb/dvbapi.xml index 757488b24f4f..0197bcc7842d 100644 --- a/Documentation/DocBook/media/dvb/dvbapi.xml +++ b/Documentation/DocBook/media/dvb/dvbapi.xml @@ -84,7 +84,7 @@ Added ISDB-T test originally written by Patrick Boettcher LINUX DVB API -Version 5.8 +Version 5.10 &sub-intro; diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml index 957e3acaae8e..4a5eaeed0b9e 100644 --- a/Documentation/DocBook/media/dvb/dvbproperty.xml +++ b/Documentation/DocBook/media/dvb/dvbproperty.xml @@ -7,14 +7,41 @@ the capability ioctls weren't implemented yet via the new way. The typical usage for the FE_GET_PROPERTY/FE_SET_PROPERTY API is to replace the ioctl's were the struct dvb_frontend_parameters were used. +
+DTV stats type + +struct dtv_stats { + __u8 scale; /* enum fecap_scale_params type */ + union { + __u64 uvalue; /* for counters and relative scales */ + __s64 svalue; /* for 1/1000 dB measures */ + }; +} __packed; + +
+
+DTV stats type + +#define MAX_DTV_STATS 4 + +struct dtv_fe_stats { + __u8 len; + struct dtv_stats stat[MAX_DTV_STATS]; +} __packed; + +
+
DTV property type /* Reserved fields should be set to 0 */ + struct dtv_property { __u32 cmd; + __u32 reserved[3]; union { __u32 data; + struct dtv_fe_stats st; struct { __u8 data[32]; __u32 len; @@ -440,7 +467,7 @@ typedef enum fe_delivery_system { <constant>DTV-ISDBT-LAYER*</constant> parameters ISDB-T channels can be coded hierarchically. As opposed to DVB-T in ISDB-T hierarchical layers can be decoded simultaneously. For that - reason a ISDB-T demodulator has 3 viterbi and 3 reed-solomon-decoders. + reason a ISDB-T demodulator has 3 Viterbi and 3 Reed-Solomon decoders. ISDB-T has 3 hierarchical layers which each can use a part of the available segments. The total number of segments over all layers has to 13 in ISDB-T. @@ -850,6 +877,147 @@ enum fe_interleaving { use the special macro LNA_AUTO to set LNA auto
+ +
+ Frontend statistics indicators + The values are returned via dtv_property.stat. + If the property is supported, dtv_property.stat.len is bigger than zero. + For most delivery systems, dtv_property.stat.len + will be 1 if the stats is supported, and the properties will + return a single value for each parameter. + It should be noticed, however, that new OFDM delivery systems + like ISDB can use different modulation types for each group of + carriers. On such standards, up to 3 groups of statistics can be + provided, and dtv_property.stat.len is updated + to reflect the "global" metrics, plus one metric per each carrier + group (called "layer" on ISDB). + So, in order to be consistent with other delivery systems, the first + value at dtv_property.stat.dtv_stats + array refers to the global metric. The other elements of the array + represent each layer, starting from layer A(index 1), + layer B (index 2) and so on. + The number of filled elements are stored at dtv_property.stat.len. + Each element of the dtv_property.stat.dtv_stats array consists on two elements: + + svalue or uvalue, where + svalue is for signed values of the measure (dB measures) + and uvalue is for unsigned values (counters, relative scale) + scale - Scale for the value. It can be: +
+ + FE_SCALE_NOT_AVAILABLE - The parameter is supported by the frontend, but it was not possible to collect it (could be a transitory or permanent condition) + FE_SCALE_DECIBEL - parameter is a signed value, measured in 1/1000 dB + FE_SCALE_RELATIVE - parameter is a unsigned value, where 0 means 0% and 65535 means 100%. + FE_SCALE_COUNTER - parameter is a unsigned value that counts the occurrence of an event, like bit error, block error, or lapsed time. + +
+
+
+
+ <constant>DTV_STAT_SIGNAL_STRENGTH</constant> + Indicates the signal strength level at the analog part of the tuner or of the demod. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_DECIBEL - signal strength is in 0.0001 dBm units, power measured in miliwatts. This value is generally negative. + FE_SCALE_RELATIVE - The frontend provides a 0% to 100% measurement for power (actually, 0 to 65535). + +
+
+ <constant>DTV_STAT_CNR</constant> + Indicates the Signal to Noise ratio for the main carrier. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_DECIBEL - Signal/Noise ratio is in 0.0001 dB units. + FE_SCALE_RELATIVE - The frontend provides a 0% to 100% measurement for Signal/Noise (actually, 0 to 65535). + +
+
+ <constant>DTV_STAT_PRE_ERROR_BIT_COUNT</constant> + Measures the number of bit errors before the forward error correction (FEC) on the inner coding block (before Viterbi, LDPC or other inner code). + This measure is taken during the same interval as DTV_STAT_PRE_TOTAL_BIT_COUNT. + In order to get the BER (Bit Error Rate) measurement, it should be divided by + DTV_STAT_PRE_TOTAL_BIT_COUNT. + This measurement is monotonically increased, as the frontend gets more bit count measurements. + The frontend may reset it when a channel/transponder is tuned. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of error bits counted before the inner coding. + +
+
+ <constant>DTV_STAT_PRE_TOTAL_BIT_COUNT</constant> + Measures the amount of bits received before the inner code block, during the same period as + DTV_STAT_PRE_ERROR_BIT_COUNT measurement was taken. + It should be noticed that this measurement can be smaller than the total amount of bits on the transport stream, + as the frontend may need to manually restart the measurement, loosing some data between each measurement interval. + This measurement is monotonically increased, as the frontend gets more bit count measurements. + The frontend may reset it when a channel/transponder is tuned. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of bits counted while measuring + DTV_STAT_PRE_ERROR_BIT_COUNT. + +
+
+ <constant>DTV_STAT_POST_ERROR_BIT_COUNT</constant> + Measures the number of bit errors after the forward error correction (FEC) done by inner code block (after Viterbi, LDPC or other inner code). + This measure is taken during the same interval as DTV_STAT_POST_TOTAL_BIT_COUNT. + In order to get the BER (Bit Error Rate) measurement, it should be divided by + DTV_STAT_POST_TOTAL_BIT_COUNT. + This measurement is monotonically increased, as the frontend gets more bit count measurements. + The frontend may reset it when a channel/transponder is tuned. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of error bits counted after the inner coding. + +
+
+ <constant>DTV_STAT_POST_TOTAL_BIT_COUNT</constant> + Measures the amount of bits received after the inner coding, during the same period as + DTV_STAT_POST_ERROR_BIT_COUNT measurement was taken. + It should be noticed that this measurement can be smaller than the total amount of bits on the transport stream, + as the frontend may need to manually restart the measurement, loosing some data between each measurement interval. + This measurement is monotonically increased, as the frontend gets more bit count measurements. + The frontend may reset it when a channel/transponder is tuned. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of bits counted while measuring + DTV_STAT_POST_ERROR_BIT_COUNT. + +
+
+ <constant>DTV_STAT_ERROR_BLOCK_COUNT</constant> + Measures the number of block errors after the outer forward error correction coding (after Reed-Solomon or other outer code). + This measurement is monotonically increased, as the frontend gets more bit count measurements. + The frontend may reset it when a channel/transponder is tuned. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of error blocks counted after the outer coding. + +
+
+ <constant>DTV-STAT_TOTAL_BLOCK_COUNT</constant> + Measures the total number of blocks received during the same period as + DTV_STAT_ERROR_BLOCK_COUNT measurement was taken. + It can be used to calculate the PER indicator, by dividing + DTV_STAT_ERROR_BLOCK_COUNT + by DTV-STAT-TOTAL-BLOCK-COUNT. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of blocks counted while measuring + DTV_STAT_ERROR_BLOCK_COUNT. + +
+
+
Properties used on terrestrial delivery systems
@@ -871,6 +1039,7 @@ enum fe_interleaving { DTV_HIERARCHY DTV_LNA + In addition, the DTV QoS statistics are also valid.
DVB-T2 delivery system @@ -895,6 +1064,7 @@ enum fe_interleaving { DTV_STREAM_ID DTV_LNA + In addition, the DTV QoS statistics are also valid.
ISDB-T delivery system @@ -948,6 +1118,7 @@ enum fe_interleaving { DTV_ISDBT_LAYERC_SEGMENT_COUNT DTV_ISDBT_LAYERC_TIME_INTERLEAVING + In addition, the DTV QoS statistics are also valid.
ATSC delivery system @@ -961,6 +1132,7 @@ enum fe_interleaving { DTV_MODULATION DTV_BANDWIDTH_HZ + In addition, the DTV QoS statistics are also valid.
ATSC-MH delivery system @@ -988,6 +1160,7 @@ enum fe_interleaving { DTV_ATSCMH_SCCC_CODE_MODE_C DTV_ATSCMH_SCCC_CODE_MODE_D + In addition, the DTV QoS statistics are also valid.
DTMB delivery system @@ -1007,6 +1180,7 @@ enum fe_interleaving { DTV_INTERLEAVING DTV_LNA + In addition, the DTV QoS statistics are also valid.
@@ -1028,6 +1202,7 @@ enum fe_interleaving { DTV_INNER_FEC DTV_LNA + In addition, the DTV QoS statistics are also valid.
DVB-C Annex B delivery system @@ -1043,6 +1218,7 @@ enum fe_interleaving { DTV_INVERSION DTV_LNA + In addition, the DTV QoS statistics are also valid.
@@ -1062,6 +1238,7 @@ enum fe_interleaving { DTV_VOLTAGE DTV_TONE + In addition, the DTV QoS statistics are also valid. Future implementations might add those two missing parameters: DTV_DISEQC_MASTER @@ -1077,6 +1254,7 @@ enum fe_interleaving { DTV_ROLLOFF DTV_STREAM_ID + In addition, the DTV QoS statistics are also valid.
Turbo code delivery system diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml index 426c2526a454..df39ba395df0 100644 --- a/Documentation/DocBook/media/dvb/frontend.xml +++ b/Documentation/DocBook/media/dvb/frontend.xml @@ -230,7 +230,7 @@ typedef enum fe_status { The frontend has found a DVB signal FE_HAS_VITERBI -The frontend FEC code is stable +The frontend FEC inner coding (Viterbi, LDPC or other inner code) is stable FE_HAS_SYNC Syncronization bytes was found diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h index c12d452cb40d..c56d77c496a5 100644 --- a/include/uapi/linux/dvb/frontend.h +++ b/include/uapi/linux/dvb/frontend.h @@ -365,7 +365,17 @@ struct dvb_frontend_event { #define DTV_INTERLEAVING 60 #define DTV_LNA 61 -#define DTV_MAX_COMMAND DTV_LNA +/* Quality parameters */ +#define DTV_STAT_SIGNAL_STRENGTH 62 +#define DTV_STAT_CNR 63 +#define DTV_STAT_PRE_ERROR_BIT_COUNT 64 +#define DTV_STAT_PRE_TOTAL_BIT_COUNT 65 +#define DTV_STAT_POST_ERROR_BIT_COUNT 66 +#define DTV_STAT_POST_TOTAL_BIT_COUNT 67 +#define DTV_STAT_ERROR_BLOCK_COUNT 68 +#define DTV_STAT_TOTAL_BLOCK_COUNT 69 + +#define DTV_MAX_COMMAND DTV_STAT_TOTAL_BLOCK_COUNT typedef enum fe_pilot { PILOT_ON, @@ -452,11 +462,78 @@ struct dtv_cmds_h { __u32 reserved:30; /* Align */ }; +/** + * Scale types for the quality parameters. + * @FE_SCALE_NOT_AVAILABLE: That QoS measure is not available. That + * could indicate a temporary or a permanent + * condition. + * @FE_SCALE_DECIBEL: The scale is measured in 0.0001 dB steps, typically + * used on signal measures. + * @FE_SCALE_RELATIVE: The scale is a relative percentual measure, + * ranging from 0 (0%) to 0xffff (100%). + * @FE_SCALE_COUNTER: The scale counts the occurrence of an event, like + * bit error, block error, lapsed time. + */ +enum fecap_scale_params { + FE_SCALE_NOT_AVAILABLE = 0, + FE_SCALE_DECIBEL, + FE_SCALE_RELATIVE, + FE_SCALE_COUNTER +}; + +/** + * struct dtv_stats - Used for reading a DTV status property + * + * @value: value of the measure. Should range from 0 to 0xffff; + * @scale: Filled with enum fecap_scale_params - the scale + * in usage for that parameter + * + * For most delivery systems, this will return a single value for each + * parameter. + * It should be noticed, however, that new OFDM delivery systems like + * ISDB can use different modulation types for each group of carriers. + * On such standards, up to 8 groups of statistics can be provided, one + * for each carrier group (called "layer" on ISDB). + * In order to be consistent with other delivery systems, the first + * value refers to the entire set of carriers ("global"). + * dtv_status:scale should use the value FE_SCALE_NOT_AVAILABLE when + * the value for the entire group of carriers or from one specific layer + * is not provided by the hardware. + * st.len should be filled with the latest filled status + 1. + * + * In other words, for ISDB, those values should be filled like: + * u.st.stat.svalue[0] = global statistics; + * u.st.stat.scale[0] = FE_SCALE_DECIBELS; + * u.st.stat.value[1] = layer A statistics; + * u.st.stat.scale[1] = FE_SCALE_NOT_AVAILABLE (if not available); + * u.st.stat.svalue[2] = layer B statistics; + * u.st.stat.scale[2] = FE_SCALE_DECIBELS; + * u.st.stat.svalue[3] = layer C statistics; + * u.st.stat.scale[3] = FE_SCALE_DECIBELS; + * u.st.len = 4; + */ +struct dtv_stats { + __u8 scale; /* enum fecap_scale_params type */ + union { + __u64 uvalue; /* for counters and relative scales */ + __s64 svalue; /* for 0.0001 dB measures */ + }; +} __attribute__ ((packed)); + + +#define MAX_DTV_STATS 4 + +struct dtv_fe_stats { + __u8 len; + struct dtv_stats stat[MAX_DTV_STATS]; +} __attribute__ ((packed)); + struct dtv_property { __u32 cmd; __u32 reserved[3]; union { __u32 data; + struct dtv_fe_stats st; struct { __u8 data[32]; __u32 len; diff --git a/include/uapi/linux/dvb/version.h b/include/uapi/linux/dvb/version.h index 827cce7e33e3..e53e2ad4444f 100644 --- a/include/uapi/linux/dvb/version.h +++ b/include/uapi/linux/dvb/version.h @@ -24,6 +24,6 @@ #define _DVBVERSION_H_ #define DVB_API_VERSION 5 -#define DVB_API_VERSION_MINOR 9 +#define DVB_API_VERSION_MINOR 10 #endif /*_DVBVERSION_H_*/ -- cgit v1.2.3 From 0b05b18381eea98c9c9ada95629bf659a88c9374 Mon Sep 17 00:00:00 2001 From: "Anand V. Avati" Date: Sun, 19 Aug 2012 08:53:23 -0400 Subject: fuse: implement NFS-like readdirplus support This patch implements readdirplus support in FUSE, similar to NFS. The payload returned in the readdirplus call contains 'fuse_entry_out' structure thereby providing all the necessary inputs for 'faking' a lookup() operation on the spot. If the dentry and inode already existed (for e.g. in a re-run of ls -l) then just the inode attributes timeout and dentry timeout are refreshed. With a simple client->network->server implementation of a FUSE based filesystem, the following performance observations were made: Test: Performing a filesystem crawl over 20,000 files with sh# time ls -lR /mnt Without readdirplus: Run 1: 18.1s Run 2: 16.0s Run 3: 16.2s With readdirplus: Run 1: 4.1s Run 2: 3.8s Run 3: 3.8s The performance improvement is significant as it avoided 20,000 upcalls calls (lookup). Cache consistency is no worse than what already is. Signed-off-by: Anand V. Avati Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 19 ++++++ fs/fuse/dir.c | 160 ++++++++++++++++++++++++++++++++++++++++++++-- fs/fuse/fuse_i.h | 6 ++ fs/fuse/inode.c | 5 +- include/uapi/linux/fuse.h | 12 ++++ 5 files changed, 197 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e83351aa5bad..05c3eec298f2 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -491,6 +491,25 @@ void fuse_request_send_background_locked(struct fuse_conn *fc, fuse_request_send_nowait_locked(fc, req); } +void fuse_force_forget(struct file *file, u64 nodeid) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_forget_in inarg; + + memset(&inarg, 0, sizeof(inarg)); + inarg.nlookup = 1; + req = fuse_get_req_nofail(fc, file); + req->in.h.opcode = FUSE_FORGET; + req->in.h.nodeid = nodeid; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->isreply = 0; + fuse_request_send_nowait(fc, req); +} + /* * Lock the request. Up to the next unlock_request() there mustn't be * anything that could cause a page-fault. If the request was already diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b7c09f9eb40c..dcc1e522c7d4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1155,6 +1155,143 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file, return 0; } +static int fuse_direntplus_link(struct file *file, + struct fuse_direntplus *direntplus, + u64 attr_version) +{ + int err; + struct fuse_entry_out *o = &direntplus->entry_out; + struct fuse_dirent *dirent = &direntplus->dirent; + struct dentry *parent = file->f_path.dentry; + struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); + struct dentry *dentry; + struct dentry *alias; + struct inode *dir = parent->d_inode; + struct fuse_conn *fc; + struct inode *inode; + + if (!o->nodeid) { + /* + * Unlike in the case of fuse_lookup, zero nodeid does not mean + * ENOENT. Instead, it only means the userspace filesystem did + * not want to return attributes/handle for this entry. + * + * So do nothing. + */ + return 0; + } + + if (name.name[0] == '.') { + /* + * We could potentially refresh the attributes of the directory + * and its parent? + */ + if (name.len == 1) + return 0; + if (name.name[1] == '.' && name.len == 2) + return 0; + } + fc = get_fuse_conn(dir); + + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + if (dentry && dentry->d_inode) { + inode = dentry->d_inode; + if (get_node_id(inode) == o->nodeid) { + struct fuse_inode *fi; + fi = get_fuse_inode(inode); + spin_lock(&fc->lock); + fi->nlookup++; + spin_unlock(&fc->lock); + + /* + * The other branch to 'found' comes via fuse_iget() + * which bumps nlookup inside + */ + goto found; + } + err = d_invalidate(dentry); + if (err) + goto out; + dput(dentry); + dentry = NULL; + } + + dentry = d_alloc(parent, &name); + err = -ENOMEM; + if (!dentry) + goto out; + + inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, + &o->attr, entry_attr_timeout(o), attr_version); + if (!inode) + goto out; + + alias = d_materialise_unique(dentry, inode); + err = PTR_ERR(alias); + if (IS_ERR(alias)) + goto out; + if (alias) { + dput(dentry); + dentry = alias; + } + +found: + fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o), + attr_version); + + fuse_change_entry_timeout(dentry, o); + + err = 0; +out: + if (dentry) + dput(dentry); + return err; +} + +static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, + void *dstbuf, filldir_t filldir, u64 attr_version) +{ + struct fuse_direntplus *direntplus; + struct fuse_dirent *dirent; + size_t reclen; + int over = 0; + int ret; + + while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { + direntplus = (struct fuse_direntplus *) buf; + dirent = &direntplus->dirent; + reclen = FUSE_DIRENTPLUS_SIZE(direntplus); + + if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) + return -EIO; + if (reclen > nbytes) + break; + + if (!over) { + /* We fill entries into dstbuf only as much as + it can hold. But we still continue iterating + over remaining entries to link them. If not, + we need to send a FORGET for each of those + which we did not link. + */ + over = filldir(dstbuf, dirent->name, dirent->namelen, + file->f_pos, dirent->ino, + dirent->type); + file->f_pos = dirent->off; + } + + buf += reclen; + nbytes -= reclen; + + ret = fuse_direntplus_link(file, direntplus, attr_version); + if (ret) + fuse_force_forget(file, direntplus->entry_out.nodeid); + } + + return 0; +} + static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) { int err; @@ -1163,6 +1300,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) struct inode *inode = file->f_path.dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; + u64 attr_version = 0; if (is_bad_inode(inode)) return -EIO; @@ -1179,14 +1317,28 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); + if (fc->do_readdirplus) { + attr_version = fuse_get_attr_version(fc); + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, + FUSE_READDIRPLUS); + } else { + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, + FUSE_READDIR); + } fuse_request_send(fc, req); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); - if (!err) - err = parse_dirfile(page_address(page), nbytes, file, dstbuf, - filldir); + if (!err) { + if (fc->do_readdirplus) { + err = parse_dirplusfile(page_address(page), nbytes, + file, dstbuf, filldir, + attr_version); + } else { + err = parse_dirfile(page_address(page), nbytes, file, + dstbuf, filldir); + } + } __free_page(page); fuse_invalidate_attr(inode); /* atime changed */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e105a53fc72d..5c5055306d3c 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -487,6 +487,9 @@ struct fuse_conn { /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; + /** Does the filesystem support readdir-plus? */ + unsigned do_readdirplus:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -578,6 +581,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, struct fuse_forget_link *fuse_alloc_forget(void); +/* Used by READDIRPLUS */ +void fuse_force_forget(struct file *file, u64 nodeid); + /** * Initialize READ or READDIR request */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 73ca6b72beaf..6f7d5746bf52 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -863,6 +863,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->dont_mask = 1; if (arg->flags & FUSE_AUTO_INVAL_DATA) fc->auto_inval_data = 1; + if (arg->flags & FUSE_DO_READDIRPLUS) + fc->do_readdirplus = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -889,7 +891,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | - FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA; + FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | + FUSE_DO_READDIRPLUS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d8c713e148e3..5dc1fea49ecd 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -193,6 +193,7 @@ struct fuse_file_lock { #define FUSE_FLOCK_LOCKS (1 << 10) #define FUSE_HAS_IOCTL_DIR (1 << 11) #define FUSE_AUTO_INVAL_DATA (1 << 12) +#define FUSE_DO_READDIRPLUS (1 << 13) /** * CUSE INIT request/reply flags @@ -299,6 +300,7 @@ enum fuse_opcode { FUSE_NOTIFY_REPLY = 41, FUSE_BATCH_FORGET = 42, FUSE_FALLOCATE = 43, + FUSE_READDIRPLUS = 44, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -630,6 +632,16 @@ struct fuse_dirent { #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) +struct fuse_direntplus { + struct fuse_entry_out entry_out; + struct fuse_dirent dirent; +}; + +#define FUSE_NAME_OFFSET_DIRENTPLUS \ + offsetof(struct fuse_direntplus, dirent.name) +#define FUSE_DIRENTPLUS_SIZE(d) \ + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) + struct fuse_notify_inval_inode_out { __u64 ino; __s64 off; -- cgit v1.2.3 From 54a3ac0c9e5b7213daa358ce74d154352657353a Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 24 Jan 2013 10:31:28 +0800 Subject: usb: Using correct way to clear usb3.0 device's remote wakeup feature. Usb3.0 device defines function remote wakeup which is only for interface recipient rather than device recipient. This is different with usb2.0 device's remote wakeup feature which is defined for device recipient. According usb3.0 spec 9.4.5, the function remote wakeup can be modified by the SetFeature() requests using the FUNCTION_SUSPEND feature selector. This patch is to use correct way to disable usb3.0 device's function remote wakeup after suspend error and resuming. This should be backported to kernels as old as 3.4, that contain the commit 623bef9e03a60adc623b09673297ca7a1cdfb367 "USB/xhci: Enable remote wakeup for USB3 devices." Signed-off-by: Lan Tianyu Signed-off-by: Sarah Sharp Cc: stable@vger.kernel.org --- drivers/usb/core/hub.c | 70 ++++++++++++++++++++++++++++++++------------ include/uapi/linux/usb/ch9.h | 6 ++++ 2 files changed, 58 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 957ed2c41482..cbf7168e3ce7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2838,6 +2838,23 @@ void usb_enable_ltm(struct usb_device *udev) EXPORT_SYMBOL_GPL(usb_enable_ltm); #ifdef CONFIG_USB_SUSPEND +/* + * usb_disable_function_remotewakeup - disable usb3.0 + * device's function remote wakeup + * @udev: target device + * + * Assume there's only one function on the USB 3.0 + * device and disable remote wake for the first + * interface. FIXME if the interface association + * descriptor shows there's more than one function. + */ +static int usb_disable_function_remotewakeup(struct usb_device *udev) +{ + return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), + USB_REQ_CLEAR_FEATURE, USB_RECIP_INTERFACE, + USB_INTRF_FUNC_SUSPEND, 0, NULL, 0, + USB_CTRL_SET_TIMEOUT); +} /* * usb_port_suspend - suspend a usb device's upstream port @@ -2955,12 +2972,19 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) dev_dbg(hub->intfdev, "can't suspend port %d, status %d\n", port1, status); /* paranoia: "should not happen" */ - if (udev->do_remote_wakeup) - (void) usb_control_msg(udev, usb_sndctrlpipe(udev, 0), - USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE, - USB_DEVICE_REMOTE_WAKEUP, 0, - NULL, 0, - USB_CTRL_SET_TIMEOUT); + if (udev->do_remote_wakeup) { + if (!hub_is_superspeed(hub->hdev)) { + (void) usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + USB_REQ_CLEAR_FEATURE, + USB_RECIP_DEVICE, + USB_DEVICE_REMOTE_WAKEUP, 0, + NULL, 0, + USB_CTRL_SET_TIMEOUT); + } else + (void) usb_disable_function_remotewakeup(udev); + + } /* Try to enable USB2 hardware LPM again */ if (udev->usb2_hw_lpm_capable == 1) @@ -3052,20 +3076,30 @@ static int finish_port_resume(struct usb_device *udev) * udev->reset_resume */ } else if (udev->actconfig && !udev->reset_resume) { - le16_to_cpus(&devstatus); - if (devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP)) { - status = usb_control_msg(udev, - usb_sndctrlpipe(udev, 0), - USB_REQ_CLEAR_FEATURE, + if (!hub_is_superspeed(udev->parent)) { + le16_to_cpus(&devstatus); + if (devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP)) + status = usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE, - USB_DEVICE_REMOTE_WAKEUP, 0, - NULL, 0, - USB_CTRL_SET_TIMEOUT); - if (status) - dev_dbg(&udev->dev, - "disable remote wakeup, status %d\n", - status); + USB_DEVICE_REMOTE_WAKEUP, 0, + NULL, 0, + USB_CTRL_SET_TIMEOUT); + } else { + status = usb_get_status(udev, USB_RECIP_INTERFACE, 0, + &devstatus); + le16_to_cpus(&devstatus); + if (!status && devstatus & (USB_INTRF_STAT_FUNC_RW_CAP + | USB_INTRF_STAT_FUNC_RW)) + status = + usb_disable_function_remotewakeup(udev); } + + if (status) + dev_dbg(&udev->dev, + "disable remote wakeup, status %d\n", + status); status = 0; } return status; diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 50598472dc41..f738e25377ff 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -152,6 +152,12 @@ #define USB_INTRF_FUNC_SUSPEND_LP (1 << (8 + 0)) #define USB_INTRF_FUNC_SUSPEND_RW (1 << (8 + 1)) +/* + * Interface status, Figure 9-5 USB 3.0 spec + */ +#define USB_INTRF_STAT_FUNC_RW_CAP 1 +#define USB_INTRF_STAT_FUNC_RW 2 + #define USB_ENDPOINT_HALT 0 /* IN/OUT will STALL */ /* Bit array elements as returned by the USB_REQ_GET_STATUS request. */ -- cgit v1.2.3 From b878e7fb22ea48b0585bbbbef249f7efc6d42748 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Tue, 8 Jan 2013 14:44:25 -0500 Subject: perf: Missing field in PERF_RECORD_SAMPLE documentation While trying to write a perf_event/mmap test for my perf_event test-suite I came across a missing field description in the PERF_RECORD_SAMPLE documentation in perf_event.h Signed-off-by: Vince Weaver Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1301081439300.24507@vincent-weaver-1.um.maine.edu Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4f63c05d27c9..9fa9c622a7f4 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -579,7 +579,8 @@ enum perf_event_type { * { u32 size; * char data[size];}&& PERF_SAMPLE_RAW * - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 nr; + * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER -- cgit v1.2.3 From 77765eaf5cfb6b8dd98ec8b54b411d74ff6095f1 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 18 Jan 2013 11:18:45 +0530 Subject: cfg80211/nl80211: add API for MAC address ACLs Add API to enable drivers to implement MAC address based access control in AP/P2P GO mode. Capable drivers advertise this capability by setting the maximum number of MAC addresses in such a list in wiphy->max_acl_mac_addrs. An initial ACL may be given to the NL80211_CMD_START_AP command and/or changed later with NL80211_CMD_SET_MAC_ACL. Black- and whitelists are supported, but not simultaneously. Signed-off-by: Vasanthakumar Thiagarajan [rewrite commit log, many cleanups] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 34 +++++++++++++ include/uapi/linux/nl80211.h | 51 ++++++++++++++++++- net/wireless/core.c | 5 ++ net/wireless/nl80211.c | 116 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 12 +++++ net/wireless/trace.h | 18 +++++++ 6 files changed, 234 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 183033789e69..36e076e374d2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -531,6 +531,22 @@ struct mac_address { u8 addr[ETH_ALEN]; }; +/** + * struct cfg80211_acl_data - Access control list data + * + * @acl_policy: ACL policy to be applied on the station's + entry specified by mac_addr + * @n_acl_entries: Number of MAC address entries passed + * @mac_addrs: List of MAC addresses of stations to be used for ACL + */ +struct cfg80211_acl_data { + enum nl80211_acl_policy acl_policy; + int n_acl_entries; + + /* Keep it last */ + struct mac_address mac_addrs[]; +}; + /** * struct cfg80211_ap_settings - AP configuration * @@ -550,6 +566,8 @@ struct mac_address { * @inactivity_timeout: time in seconds to determine station's inactivity. * @p2p_ctwindow: P2P CT Window * @p2p_opp_ps: P2P opportunistic PS + * @acl: ACL configuration used by the drivers which has support for + * MAC address based access control */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -566,6 +584,7 @@ struct cfg80211_ap_settings { int inactivity_timeout; u8 p2p_ctwindow; bool p2p_opp_ps; + const struct cfg80211_acl_data *acl; }; /** @@ -1800,6 +1819,13 @@ struct cfg80211_gtk_rekey_data { * * @start_p2p_device: Start the given P2P device. * @stop_p2p_device: Stop the given P2P device. + * + * @set_mac_acl: Sets MAC address control list in AP and P2P GO mode. + * Parameters include ACL policy, an array of MAC address of stations + * and the number of MAC addresses. If there is already a list in driver + * this new list replaces the existing one. Driver has to clear its ACL + * when number of MAC addresses entries is passed as 0. Drivers which + * advertise the support for MAC based ACL have to implement this callback. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2020,6 +2046,9 @@ struct cfg80211_ops { struct wireless_dev *wdev); void (*stop_p2p_device)(struct wiphy *wiphy, struct wireless_dev *wdev); + + int (*set_mac_acl)(struct wiphy *wiphy, struct net_device *dev, + const struct cfg80211_acl_data *params); }; /* @@ -2325,6 +2354,9 @@ struct wiphy_wowlan_support { * @ap_sme_capa: AP SME capabilities, flags from &enum nl80211_ap_sme_features. * @ht_capa_mod_mask: Specify what ht_cap values can be over-ridden. * If null, then none can be over-ridden. + * + * @max_acl_mac_addrs: Maximum number of MAC addresses that the device + * supports for ACL. */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -2346,6 +2378,8 @@ struct wiphy { /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; + u16 max_acl_mac_addrs; + u32 flags, features; u32 ap_sme_capa; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e6eeb4ba5dc5..5b7dbc1ea966 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -170,7 +170,8 @@ * %NL80211_ATTR_HIDDEN_SSID, %NL80211_ATTR_CIPHERS_PAIRWISE, * %NL80211_ATTR_CIPHER_GROUP, %NL80211_ATTR_WPA_VERSIONS, * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY, - * %NL80211_ATTR_AUTH_TYPE and %NL80211_ATTR_INACTIVITY_TIMEOUT. + * %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_INACTIVITY_TIMEOUT, + * %NL80211_ATTR_ACL_POLICY and %NL80211_ATTR_MAC_ADDRS. * The channel to use can be set on the interface or be given using the * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel width. * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP @@ -586,6 +587,16 @@ * @NL80211_CMD_SET_MCAST_RATE: Change the rate used to send multicast frames * for IBSS or MESH vif. * + * @NL80211_CMD_SET_MAC_ACL: sets ACL for MAC address based access control. + * This is to be used with the drivers advertising the support of MAC + * address based access control. List of MAC addresses is passed in + * %NL80211_ATTR_MAC_ADDRS and ACL policy is passed in + * %NL80211_ATTR_ACL_POLICY. Driver will enable ACL with this list, if it + * is not already done. The new list will replace any existing list. Driver + * will clear its ACL when the list of MAC addresses passed is empty. This + * command is used in AP/P2P GO mode. Driver has to make sure to clear its + * ACL list during %NL80211_CMD_STOP_AP. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -736,6 +747,8 @@ enum nl80211_commands { NL80211_CMD_SET_MCAST_RATE, + NL80211_CMD_SET_MAC_ACL, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1313,6 +1326,16 @@ enum nl80211_commands { * @NL80211_ATTR_LOCAL_MESH_POWER_MODE: local mesh STA link-specific power mode * defined in &enum nl80211_mesh_power_mode. * + * @NL80211_ATTR_ACL_POLICY: ACL policy, see &enum nl80211_acl_policy, + * carried in a u32 attribute + * + * @NL80211_ATTR_MAC_ADDRS: Array of nested MAC addresses, used for + * MAC ACL. + * + * @NL80211_ATTR_MAC_ACL_MAX: u32 attribute to advertise the maximum + * number of MAC addresses that a device can support for MAC + * ACL. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1585,6 +1608,12 @@ enum nl80211_attrs { NL80211_ATTR_LOCAL_MESH_POWER_MODE, + NL80211_ATTR_ACL_POLICY, + + NL80211_ATTR_MAC_ADDRS, + + NL80211_ATTR_MAC_ACL_MAX, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3248,7 +3277,7 @@ enum nl80211_probe_resp_offload_support_attr { * enum nl80211_connect_failed_reason - connection request failed reasons * @NL80211_CONN_FAIL_MAX_CLIENTS: Maximum number of clients that can be * handled by the AP is reached. - * @NL80211_CONN_FAIL_BLOCKED_CLIENT: Client's MAC is in the AP's blocklist. + * @NL80211_CONN_FAIL_BLOCKED_CLIENT: Connection request is rejected due to ACL. */ enum nl80211_connect_failed_reason { NL80211_CONN_FAIL_MAX_CLIENTS, @@ -3276,4 +3305,22 @@ enum nl80211_scan_flags { NL80211_SCAN_FLAG_AP = 1<<2, }; +/** + * enum nl80211_acl_policy - access control policy + * + * Access control policy is applied on a MAC list set by + * %NL80211_CMD_START_AP and %NL80211_CMD_SET_MAC_ACL, to + * be used with %NL80211_ATTR_ACL_POLICY. + * + * @NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED: Deny stations which are + * listed in ACL, i.e. allow all the stations which are not listed + * in ACL to authenticate. + * @NL80211_ACL_POLICY_DENY_UNLESS_LISTED: Allow the stations which are listed + * in ACL, i.e. deny all the stations which are not listed in ACL. + */ +enum nl80211_acl_policy { + NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED, + NL80211_ACL_POLICY_DENY_UNLESS_LISTED, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 0e702cdc6043..ce827242f390 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -478,6 +478,11 @@ int wiphy_register(struct wiphy *wiphy) ETH_ALEN))) return -EINVAL; + if (WARN_ON(wiphy->max_acl_mac_addrs && + (!(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME) || + !rdev->ops->set_mac_acl))) + return -EINVAL; + if (wiphy->addresses) memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 33de80364c5c..b5978ab4ad7a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -365,6 +365,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, + [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, + [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -1268,6 +1270,12 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.ht_capa_mod_mask)) goto nla_put_failure; + if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && + dev->wiphy.max_acl_mac_addrs && + nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, + dev->wiphy.max_acl_mac_addrs)) + goto nla_put_failure; + return genlmsg_end(msg, hdr); nla_put_failure: @@ -2491,6 +2499,97 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) return err; } +/* This function returns an error or the number of nested attributes */ +static int validate_acl_mac_addrs(struct nlattr *nl_attr) +{ + struct nlattr *attr; + int n_entries = 0, tmp; + + nla_for_each_nested(attr, nl_attr, tmp) { + if (nla_len(attr) != ETH_ALEN) + return -EINVAL; + + n_entries++; + } + + return n_entries; +} + +/* + * This function parses ACL information and allocates memory for ACL data. + * On successful return, the calling function is responsible to free the + * ACL buffer returned by this function. + */ +static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy, + struct genl_info *info) +{ + enum nl80211_acl_policy acl_policy; + struct nlattr *attr; + struct cfg80211_acl_data *acl; + int i = 0, n_entries, tmp; + + if (!wiphy->max_acl_mac_addrs) + return ERR_PTR(-EOPNOTSUPP); + + if (!info->attrs[NL80211_ATTR_ACL_POLICY]) + return ERR_PTR(-EINVAL); + + acl_policy = nla_get_u32(info->attrs[NL80211_ATTR_ACL_POLICY]); + if (acl_policy != NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED && + acl_policy != NL80211_ACL_POLICY_DENY_UNLESS_LISTED) + return ERR_PTR(-EINVAL); + + if (!info->attrs[NL80211_ATTR_MAC_ADDRS]) + return ERR_PTR(-EINVAL); + + n_entries = validate_acl_mac_addrs(info->attrs[NL80211_ATTR_MAC_ADDRS]); + if (n_entries < 0) + return ERR_PTR(n_entries); + + if (n_entries > wiphy->max_acl_mac_addrs) + return ERR_PTR(-ENOTSUPP); + + acl = kzalloc(sizeof(*acl) + (sizeof(struct mac_address) * n_entries), + GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + nla_for_each_nested(attr, info->attrs[NL80211_ATTR_MAC_ADDRS], tmp) { + memcpy(acl->mac_addrs[i].addr, nla_data(attr), ETH_ALEN); + i++; + } + + acl->n_acl_entries = n_entries; + acl->acl_policy = acl_policy; + + return acl; +} + +static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_acl_data *acl; + int err; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + if (!dev->ieee80211_ptr->beacon_interval) + return -EINVAL; + + acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + err = rdev_set_mac_acl(rdev, dev, acl); + + kfree(acl); + + return err; +} + static int nl80211_parse_beacon(struct genl_info *info, struct cfg80211_beacon_data *bcn) { @@ -2734,6 +2833,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (info->attrs[NL80211_ATTR_ACL_POLICY]) { + params.acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(params.acl)) + return PTR_ERR(params.acl); + } + err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { wdev->preset_chandef = params.chandef; @@ -2742,6 +2847,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->ssid_len = params.ssid_len; memcpy(wdev->ssid, params.ssid, wdev->ssid_len); } + + kfree(params.acl); + return err; } @@ -7876,6 +7984,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_MAC_ACL, + .doit = nl80211_set_mac_acl, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 6c0c8191f837..422d38291d66 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -875,4 +875,16 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); trace_rdev_return_void(&rdev->wiphy); } + +static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_acl_data *params) +{ + int ret; + + trace_rdev_set_mac_acl(&rdev->wiphy, dev, params); + ret = rdev->ops->set_mac_acl(&rdev->wiphy, dev, params); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 2134576f426e..8bc553199686 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1767,6 +1767,24 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_set_mac_acl, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_acl_data *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u32, acl_policy) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WIPHY_ASSIGN; + __entry->acl_policy = params->acl_policy; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ -- cgit v1.2.3 From d904d3edcbb26efc86ea3575bb4265559801a94b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 17 Jan 2013 18:43:41 +0100 Subject: can: gw: make routing to the incoming CAN interface configurable Introduce new configuration flag CGW_FLAGS_CAN_IIF_TX_OK to configure if a CAN sk_buff that has been routed with can-gw is allowed to be send back to the originating CAN interface. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/gw.h | 1 + net/can/gw.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 8e1db18c3cb6..0505c7f86213 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -51,6 +51,7 @@ enum { #define CGW_FLAGS_CAN_ECHO 0x01 #define CGW_FLAGS_CAN_SRC_TSTAMP 0x02 +#define CGW_FLAGS_CAN_IIF_TX_OK 0x04 #define CGW_MOD_FUNCS 4 /* AND OR XOR SET */ diff --git a/net/can/gw.c b/net/can/gw.c index 574dda78eb0f..37a3efb7cc9d 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -347,6 +348,13 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) return; } + /* is sending the skb back to the incoming interface not allowed? */ + if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) && + skb_headroom(skb) == sizeof(struct can_skb_priv) && + (((struct can_skb_priv *)(skb->head))->ifindex == + gwj->dst.dev->ifindex)) + return; + /* * clone the given skb, which has not been done in can_rcv() * -- cgit v1.2.3 From e6afa00a1409bc3bceed9ccb33111519463dfe7b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 17 Jan 2013 18:43:46 +0100 Subject: can: gw: indicate and count deleted frames due to misconfiguration Add a statistic counter to detect deleted frames due to misconfiguration with a new read-only CGW_DELETED netlink attribute for the CAN gateway. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/gw.h | 1 + net/can/gw.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 0505c7f86213..ae07bec74f4b 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -44,6 +44,7 @@ enum { CGW_SRC_IF, /* ifindex of source network interface */ CGW_DST_IF, /* ifindex of destination network interface */ CGW_FILTER, /* specify struct can_filter on source CAN device */ + CGW_DELETED, /* number of deleted CAN frames (see max_hops param) */ __CGW_MAX }; diff --git a/net/can/gw.c b/net/can/gw.c index 4216a80618cb..acdd4656cc3b 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -131,6 +131,7 @@ struct cgw_job { struct rcu_head rcu; u32 handled_frames; u32 dropped_frames; + u32 deleted_frames; struct cf_mod mod; union { /* CAN frame data source */ @@ -367,8 +368,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) BUG_ON(skb->ip_summed != CHECKSUM_UNNECESSARY); - if (cgw_hops(skb) >= max_hops) + if (cgw_hops(skb) >= max_hops) { + /* indicate deleted frames due to misconfiguration */ + gwj->deleted_frames++; return; + } if (!(gwj->dst.dev->flags & IFF_UP)) { gwj->dropped_frames++; @@ -500,6 +504,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + if (gwj->deleted_frames) { + if (nla_put_u32(skb, CGW_DELETED, gwj->deleted_frames) < 0) + goto cancel; + } + /* check non default settings of attributes */ if (gwj->mod.modtype.and) { @@ -799,6 +808,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->handled_frames = 0; gwj->dropped_frames = 0; + gwj->deleted_frames = 0; gwj->flags = r->flags; gwj->gwtype = r->gwtype; -- cgit v1.2.3 From cd8f7cb4e6dfa4ea08fc250a814240b883ef7911 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 Jan 2013 12:34:29 +0100 Subject: cfg80211/mac80211: support reporting wakeup reason When waking up from WoWLAN, it is useful to know what triggered the wakeup. Support reporting the wakeup reason(s) in cfg80211 (and a pass-through in mac80211) to allow userspace to know. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 41 +++++++++++++++++++ include/net/mac80211.h | 12 ++++++ include/uapi/linux/nl80211.h | 31 ++++++++++++++ net/mac80211/pm.c | 10 +++++ net/wireless/nl80211.c | 97 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/trace.h | 35 ++++++++++++++++ 6 files changed, 226 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 36e076e374d2..48add7e3ba1d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1596,6 +1596,32 @@ struct cfg80211_wowlan { int n_patterns; }; +/** + * struct cfg80211_wowlan_wakeup - wakeup report + * @disconnect: woke up by getting disconnected + * @magic_pkt: woke up by receiving magic packet + * @gtk_rekey_failure: woke up by GTK rekey failure + * @eap_identity_req: woke up by EAP identity request packet + * @four_way_handshake: woke up by 4-way handshake + * @rfkill_release: woke up by rfkill being released + * @pattern_idx: pattern that caused wakeup, -1 if not due to pattern + * @packet_present_len: copied wakeup packet data + * @packet_len: original wakeup packet length + * @packet: The packet causing the wakeup, if any. + * @packet_80211: For pattern match, magic packet and other data + * frame triggers an 802.3 frame should be reported, for + * disconnect due to deauth 802.11 frame. This indicates which + * it is. + */ +struct cfg80211_wowlan_wakeup { + bool disconnect, magic_pkt, gtk_rekey_failure, + eap_identity_req, four_way_handshake, + rfkill_release, packet_80211; + s32 pattern_idx; + u32 packet_present_len, packet_len; + const void *packet; +}; + /** * struct cfg80211_gtk_rekey_data - rekey data * @kek: key encryption key @@ -3852,6 +3878,21 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, enum ieee80211_p2p_attr_id attr, u8 *buf, unsigned int bufsize); +/** + * cfg80211_report_wowlan_wakeup - report wakeup from WoWLAN + * @wdev: the wireless device reporting the wakeup + * @wakeup: the wakeup report + * @gfp: allocation flags + * + * This function reports that the given device woke up. If it + * caused the wakeup, report the reason(s), otherwise you may + * pass %NULL as the @wakeup parameter to advertise that something + * else caused the wakeup. + */ +void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 21831ee57e3c..7a27e00c513a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4206,4 +4206,16 @@ void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif); */ int ieee80211_ave_rssi(struct ieee80211_vif *vif); +/** + * ieee80211_report_wowlan_wakeup - report WoWLAN wakeup + * @vif: virtual interface + * @wakeup: wakeup reason(s) + * @gfp: allocation flags + * + * See cfg80211_report_wowlan_wakeup(). + */ +void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp); + #endif /* MAC80211_H */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5b7dbc1ea966..225a65e72219 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -513,6 +513,12 @@ * command with the %NL80211_ATTR_WOWLAN_TRIGGERS attribute. For * more background information, see * http://wireless.kernel.org/en/users/Documentation/WoWLAN. + * The @NL80211_CMD_SET_WOWLAN command can also be used as a notification + * from the driver reporting the wakeup reason. In this case, the + * @NL80211_ATTR_WOWLAN_TRIGGERS attribute will contain the reason + * for the wakeup, if it was caused by wireless. If it is not present + * in the wakeup notification, the wireless device didn't cause the + * wakeup but reports that it was woken up. * * @NL80211_CMD_SET_REKEY_OFFLOAD: This command is used give the driver * the necessary information for supporting GTK rekey offload. This @@ -2947,6 +2953,10 @@ struct nl80211_wowlan_pattern_support { * * In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute * carrying a &struct nl80211_wowlan_pattern_support. + * + * When reporting wakeup. it is a u32 attribute containing the 0-based + * index of the pattern that caused the wakeup, in the patterns passed + * to the kernel when configuring. * @NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED: Not a real trigger, and cannot be * used when setting, used only to indicate that GTK rekeying is supported * by the device (flag) @@ -2957,8 +2967,25 @@ struct nl80211_wowlan_pattern_support { * @NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE: wake up on 4-way handshake (flag) * @NL80211_WOWLAN_TRIG_RFKILL_RELEASE: wake up when rfkill is released * (on devices that have rfkill in the device) (flag) + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211: For wakeup reporting only, contains + * the 802.11 packet that caused the wakeup, e.g. a deauth frame. The frame + * may be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN + * attribute contains the original length. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN: Original length of the 802.11 + * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211 + * attribute if the packet was truncated somewhere. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023: For wakeup reporting only, contains the + * 802.11 packet that caused the wakeup, e.g. a magic packet. The frame may + * be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN attribute + * contains the original length. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN: Original length of the 802.3 + * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023 + * attribute if the packet was truncated somewhere. * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number + * + * These nested attributes are used to configure the wakeup triggers and + * to report the wakeup reason(s). */ enum nl80211_wowlan_triggers { __NL80211_WOWLAN_TRIG_INVALID, @@ -2971,6 +2998,10 @@ enum nl80211_wowlan_triggers { NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE, NL80211_WOWLAN_TRIG_RFKILL_RELEASE, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN, /* keep last */ NUM_NL80211_WOWLAN_TRIG, diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index e45b83610e85..53801d20176d 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -228,3 +228,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) * ieee80211_reconfig(), which is also needed for hardware * hang/firmware failure/etc. recovery. */ + +void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + cfg80211_report_wowlan_wakeup(&sdata->wdev, wakeup, gfp); +} +EXPORT_SYMBOL(ieee80211_report_wowlan_wakeup); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b5978ab4ad7a..d359734b6972 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9323,6 +9323,103 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_report_obss_beacon); +#ifdef CONFIG_PM +void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct sk_buff *msg; + void *hdr; + int err, size = 200; + + trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup); + + if (wakeup) + size += wakeup->packet_present_len; + + msg = nlmsg_new(size, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_SET_WOWLAN); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto free_msg; + + if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) + goto free_msg; + + if (wakeup) { + struct nlattr *reasons; + + reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + + if (wakeup->disconnect && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) + goto free_msg; + if (wakeup->magic_pkt && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) + goto free_msg; + if (wakeup->gtk_rekey_failure && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) + goto free_msg; + if (wakeup->eap_identity_req && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) + goto free_msg; + if (wakeup->four_way_handshake && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) + goto free_msg; + if (wakeup->rfkill_release && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)) + goto free_msg; + + if (wakeup->pattern_idx >= 0 && + nla_put_u32(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, + wakeup->pattern_idx)) + goto free_msg; + + if (wakeup->packet) { + u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; + u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN; + + if (!wakeup->packet_80211) { + pkt_attr = + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023; + len_attr = + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN; + } + + if (wakeup->packet_len && + nla_put_u32(msg, len_attr, wakeup->packet_len)) + goto free_msg; + + if (nla_put(msg, pkt_attr, wakeup->packet_present_len, + wakeup->packet)) + goto free_msg; + } + + nla_nest_end(msg, reasons); + } + + err = genlmsg_end(msg, hdr); + if (err < 0) + goto free_msg; + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + free_msg: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_report_wowlan_wakeup); +#endif + void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper, u16 reason_code, gfp_t gfp) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 8bc553199686..c9cafb0ea95f 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2333,6 +2333,41 @@ TRACE_EVENT(cfg80211_return_u32, TP_printk("ret: %u", __entry->ret) ); +TRACE_EVENT(cfg80211_report_wowlan_wakeup, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup), + TP_ARGS(wiphy, wdev, wakeup), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(bool, disconnect) + __field(bool, magic_pkt) + __field(bool, gtk_rekey_failure) + __field(bool, eap_identity_req) + __field(bool, four_way_handshake) + __field(bool, rfkill_release) + __field(s32, pattern_idx) + __field(u32, packet_len) + __dynamic_array(u8, packet, wakeup->packet_present_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->disconnect = wakeup->disconnect; + __entry->magic_pkt = wakeup->magic_pkt; + __entry->gtk_rekey_failure = wakeup->gtk_rekey_failure; + __entry->eap_identity_req = wakeup->eap_identity_req; + __entry->four_way_handshake = wakeup->four_way_handshake; + __entry->rfkill_release = wakeup->rfkill_release; + __entry->pattern_idx = wakeup->pattern_idx; + __entry->packet_len = wakeup->packet_len; + if (wakeup->packet && wakeup->packet_present_len) + memcpy(__get_dynamic_array(packet), wakeup->packet, + wakeup->packet_present_len); + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 23c153e54197171f30b889d9654929d74b6599d5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 31 Jan 2013 17:08:11 +0100 Subject: fuse: bump version for READDIRPLUS Yeah, we have a capability flag for this as well, so this is not strictly necessary, but it doesn't hurt either. Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 5dc1fea49ecd..3451b6061e69 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -60,6 +60,9 @@ * * 7.20 * - add FUSE_AUTO_INVAL_DATA + * + * 7.21 + * - add FUSE_READDIRPLUS */ #ifndef _LINUX_FUSE_H @@ -91,7 +94,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 20 +#define FUSE_KERNEL_MINOR_VERSION 21 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 -- cgit v1.2.3 From 6fcdf4facb85e7d54ff6195378dd2ba8e0baccc4 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 30 Jan 2013 21:50:08 -0500 Subject: wanrouter: delete now orphaned header content, files/drivers The wanrouter support was identified earlier as unused for years, and so the previous commit totally decoupled it from the kernel, leaving the related wanrouter files present, but totally inert. Here we take the final step in that cleanup, by doing a wholesale removal of these files. The two step process is used so that the large deletion is decoupled from the git history of files that we still care about. The drivers deleted here all were dependent on the Kconfig setting CONFIG_WAN_ROUTER_DRIVERS. A stub wanrouter.h header (kernel & uapi) are left behind so that drivers/isdn/i4l/isdn_x25iface.c continues to compile, and so that we don't accidentally break userspace that expected these defines. Cc: Joe Perches Cc: Dan Carpenter Cc: Arnaldo Carvalho de Melo Signed-off-by: Paul Gortmaker --- drivers/net/wan/cycx_drv.c | 569 -------------- drivers/net/wan/cycx_main.c | 346 --------- drivers/net/wan/cycx_x25.c | 1602 ---------------------------------------- include/linux/cyclomx.h | 77 -- include/linux/cycx_drv.h | 64 -- include/linux/wanrouter.h | 127 +--- include/uapi/linux/wanrouter.h | 443 +---------- net/wanrouter/Kconfig | 27 - net/wanrouter/Makefile | 7 - net/wanrouter/patchlevel | 1 - net/wanrouter/wanmain.c | 782 -------------------- net/wanrouter/wanproc.c | 380 ---------- 12 files changed, 8 insertions(+), 4417 deletions(-) delete mode 100644 drivers/net/wan/cycx_drv.c delete mode 100644 drivers/net/wan/cycx_main.c delete mode 100644 drivers/net/wan/cycx_x25.c delete mode 100644 include/linux/cyclomx.h delete mode 100644 include/linux/cycx_drv.h delete mode 100644 net/wanrouter/Kconfig delete mode 100644 net/wanrouter/Makefile delete mode 100644 net/wanrouter/patchlevel delete mode 100644 net/wanrouter/wanmain.c delete mode 100644 net/wanrouter/wanproc.c (limited to 'include/uapi/linux') diff --git a/drivers/net/wan/cycx_drv.c b/drivers/net/wan/cycx_drv.c deleted file mode 100644 index 2a3ecae67a90..000000000000 --- a/drivers/net/wan/cycx_drv.c +++ /dev/null @@ -1,569 +0,0 @@ -/* -* cycx_drv.c Cyclom 2X Support Module. -* -* This module is a library of common hardware specific -* functions used by the Cyclades Cyclom 2X sync card. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdladrv.c by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 1999/11/11 acme set_current_state(TASK_INTERRUPTIBLE), code -* cleanup -* 1999/11/08 acme init_cyc2x deleted, doing nothing -* 1999/11/06 acme back to read[bw], write[bw] and memcpy_to and -* fromio to use dpmbase ioremaped -* 1999/10/26 acme use isa_read[bw], isa_write[bw] & isa_memcpy_to -* & fromio -* 1999/10/23 acme cleanup to only supports cyclom2x: all the other -* boards are no longer manufactured by cyclades, -* if someone wants to support them... be my guest! -* 1999/05/28 acme cycx_intack & cycx_intde gone for good -* 1999/05/18 acme lots of unlogged work, submitting to Linus... -* 1999/01/03 acme more judicious use of data types -* 1999/01/03 acme judicious use of data types :> -* cycx_inten trying to reset pending interrupts -* from cyclom 2x - I think this isn't the way to -* go, but for now... -* 1999/01/02 acme cycx_intack ok, I think there's nothing to do -* to ack an int in cycx_drv.c, only handle it in -* cyx_isr (or in the other protocols: cyp_isr, -* cyf_isr, when they get implemented. -* Dec 31, 1998 acme cycx_data_boot & cycx_code_boot fixed, crossing -* fingers to see x25_configure in cycx_x25.c -* work... :) -* Dec 26, 1998 acme load implementation fixed, seems to work! :) -* cycx_2x_dpmbase_options with all the possible -* DPM addresses (20). -* cycx_intr implemented (test this!) -* general code cleanup -* Dec 8, 1998 Ivan Passos Cyclom-2X firmware load implementation. -* Aug 8, 1998 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include /* __init */ -#include -#include /* printk(), and other useful stuff */ -#include /* offsetof(), etc. */ -#include /* return codes */ -#include /* API definitions */ -#include /* CYCX firmware module definitions */ -#include /* udelay, msleep_interruptible */ -#include /* read[wl], write[wl], ioremap, iounmap */ - -#define MOD_VERSION 0 -#define MOD_RELEASE 6 - -MODULE_AUTHOR("Arnaldo Carvalho de Melo"); -MODULE_DESCRIPTION("Cyclom 2x Sync Card Driver"); -MODULE_LICENSE("GPL"); - -/* Hardware-specific functions */ -static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len); -static void cycx_bootcfg(struct cycx_hw *hw); - -static int reset_cyc2x(void __iomem *addr); -static int detect_cyc2x(void __iomem *addr); - -/* Miscellaneous functions */ -static int get_option_index(const long *optlist, long optval); -static u16 checksum(u8 *buf, u32 len); - -#define wait_cyc(addr) cycx_exec(addr + CMD_OFFSET) - -/* Global Data */ - -/* private data */ -static const char fullname[] = "Cyclom 2X Support Module"; -static const char copyright[] = - "(c) 1998-2003 Arnaldo Carvalho de Melo "; - -/* Hardware configuration options. - * These are arrays of configuration options used by verification routines. - * The first element of each array is its size (i.e. number of options). - */ -static const long cyc2x_dpmbase_options[] = { - 20, - 0xA0000, 0xA4000, 0xA8000, 0xAC000, 0xB0000, 0xB4000, 0xB8000, - 0xBC000, 0xC0000, 0xC4000, 0xC8000, 0xCC000, 0xD0000, 0xD4000, - 0xD8000, 0xDC000, 0xE0000, 0xE4000, 0xE8000, 0xEC000 -}; - -static const long cycx_2x_irq_options[] = { 7, 3, 5, 9, 10, 11, 12, 15 }; - -/* Kernel Loadable Module Entry Points */ -/* Module 'insert' entry point. - * o print announcement - * o initialize static data - * - * Return: 0 Ok - * < 0 error. - * Context: process */ - -static int __init cycx_drv_init(void) -{ - pr_info("%s v%u.%u %s\n", - fullname, MOD_VERSION, MOD_RELEASE, copyright); - - return 0; -} - -/* Module 'remove' entry point. - * o release all remaining system resources */ -static void cycx_drv_cleanup(void) -{ -} - -/* Kernel APIs */ -/* Set up adapter. - * o detect adapter type - * o verify hardware configuration options - * o check for hardware conflicts - * o set up adapter shared memory - * o test adapter memory - * o load firmware - * Return: 0 ok. - * < 0 error */ -EXPORT_SYMBOL(cycx_setup); -int cycx_setup(struct cycx_hw *hw, void *cfm, u32 len, unsigned long dpmbase) -{ - int err; - - /* Verify IRQ configuration options */ - if (!get_option_index(cycx_2x_irq_options, hw->irq)) { - pr_err("IRQ %d is invalid!\n", hw->irq); - return -EINVAL; - } - - /* Setup adapter dual-port memory window and test memory */ - if (!dpmbase) { - pr_err("you must specify the dpm address!\n"); - return -EINVAL; - } else if (!get_option_index(cyc2x_dpmbase_options, dpmbase)) { - pr_err("memory address 0x%lX is invalid!\n", dpmbase); - return -EINVAL; - } - - hw->dpmbase = ioremap(dpmbase, CYCX_WINDOWSIZE); - hw->dpmsize = CYCX_WINDOWSIZE; - - if (!detect_cyc2x(hw->dpmbase)) { - pr_err("adapter Cyclom 2X not found at address 0x%lX!\n", - dpmbase); - return -EINVAL; - } - - pr_info("found Cyclom 2X card at address 0x%lX\n", dpmbase); - - /* Load firmware. If loader fails then shut down adapter */ - err = load_cyc2x(hw, cfm, len); - - if (err) - cycx_down(hw); /* shutdown adapter */ - - return err; -} - -EXPORT_SYMBOL(cycx_down); -int cycx_down(struct cycx_hw *hw) -{ - iounmap(hw->dpmbase); - return 0; -} - -/* Enable interrupt generation. */ -static void cycx_inten(struct cycx_hw *hw) -{ - writeb(0, hw->dpmbase); -} - -/* Generate an interrupt to adapter's CPU. */ -EXPORT_SYMBOL(cycx_intr); -void cycx_intr(struct cycx_hw *hw) -{ - writew(0, hw->dpmbase + GEN_CYCX_INTR); -} - -/* Execute Adapter Command. - * o Set exec flag. - * o Busy-wait until flag is reset. */ -EXPORT_SYMBOL(cycx_exec); -int cycx_exec(void __iomem *addr) -{ - u16 i = 0; - /* wait till addr content is zeroed */ - - while (readw(addr)) { - udelay(1000); - - if (++i > 50) - return -1; - } - - return 0; -} - -/* Read absolute adapter memory. - * Transfer data from adapter's memory to data buffer. */ -EXPORT_SYMBOL(cycx_peek); -int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len) -{ - if (len == 1) - *(u8*)buf = readb(hw->dpmbase + addr); - else - memcpy_fromio(buf, hw->dpmbase + addr, len); - - return 0; -} - -/* Write Absolute Adapter Memory. - * Transfer data from data buffer to adapter's memory. */ -EXPORT_SYMBOL(cycx_poke); -int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len) -{ - if (len == 1) - writeb(*(u8*)buf, hw->dpmbase + addr); - else - memcpy_toio(hw->dpmbase + addr, buf, len); - - return 0; -} - -/* Hardware-Specific Functions */ - -/* Load Aux Routines */ -/* Reset board hardware. - return 1 if memory exists at addr and 0 if not. */ -static int memory_exists(void __iomem *addr) -{ - int tries = 0; - - for (; tries < 3 ; tries++) { - writew(TEST_PATTERN, addr + 0x10); - - if (readw(addr + 0x10) == TEST_PATTERN) - if (readw(addr + 0x10) == TEST_PATTERN) - return 1; - - msleep_interruptible(1 * 1000); - } - - return 0; -} - -/* Load reset code. */ -static void reset_load(void __iomem *addr, u8 *buffer, u32 cnt) -{ - void __iomem *pt_code = addr + RESET_OFFSET; - u16 i; /*, j; */ - - for (i = 0 ; i < cnt ; i++) { -/* for (j = 0 ; j < 50 ; j++); Delay - FIXME busy waiting... */ - writeb(*buffer++, pt_code++); - } -} - -/* Load buffer using boot interface. - * o copy data from buffer to Cyclom-X memory - * o wait for reset code to copy it to right portion of memory */ -static int buffer_load(void __iomem *addr, u8 *buffer, u32 cnt) -{ - memcpy_toio(addr + DATA_OFFSET, buffer, cnt); - writew(GEN_BOOT_DAT, addr + CMD_OFFSET); - - return wait_cyc(addr); -} - -/* Set up entry point and kick start Cyclom-X CPU. */ -static void cycx_start(void __iomem *addr) -{ - /* put in 0x30 offset the jump instruction to the code entry point */ - writeb(0xea, addr + 0x30); - writeb(0x00, addr + 0x31); - writeb(0xc4, addr + 0x32); - writeb(0x00, addr + 0x33); - writeb(0x00, addr + 0x34); - - /* cmd to start executing code */ - writew(GEN_START, addr + CMD_OFFSET); -} - -/* Load and boot reset code. */ -static void cycx_reset_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_start = addr + START_OFFSET; - - writeb(0xea, pt_start++); /* jmp to f000:3f00 */ - writeb(0x00, pt_start++); - writeb(0xfc, pt_start++); - writeb(0x00, pt_start++); - writeb(0xf0, pt_start); - reset_load(addr, code, len); - - /* 80186 was in hold, go */ - writeb(0, addr + START_CPU); - msleep_interruptible(1 * 1000); -} - -/* Load data.bin file through boot (reset) interface. */ -static int cycx_data_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_boot_cmd = addr + CMD_OFFSET; - u32 i; - - /* boot buffer length */ - writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16)); - writew(GEN_DEFPAR, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - writew(0, pt_boot_cmd + sizeof(u16)); - writew(0x4000, pt_boot_cmd + 2 * sizeof(u16)); - writew(GEN_SET_SEG, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ) - if (buffer_load(addr, code + i, - min_t(u32, CFM_LOAD_BUFSZ, (len - i))) < 0) { - pr_err("Error !!\n"); - return -1; - } - - return 0; -} - - -/* Load code.bin file through boot (reset) interface. */ -static int cycx_code_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_boot_cmd = addr + CMD_OFFSET; - u32 i; - - /* boot buffer length */ - writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16)); - writew(GEN_DEFPAR, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - writew(0x0000, pt_boot_cmd + sizeof(u16)); - writew(0xc400, pt_boot_cmd + 2 * sizeof(u16)); - writew(GEN_SET_SEG, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ) - if (buffer_load(addr, code + i, - min_t(u32, CFM_LOAD_BUFSZ, (len - i)))) { - pr_err("Error !!\n"); - return -1; - } - - return 0; -} - -/* Load adapter from the memory image of the CYCX firmware module. - * o verify firmware integrity and compatibility - * o start adapter up */ -static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len) -{ - int i, j; - struct cycx_fw_header *img_hdr; - u8 *reset_image, - *data_image, - *code_image; - void __iomem *pt_cycld = hw->dpmbase + 0x400; - u16 cksum; - - /* Announce */ - pr_info("firmware signature=\"%s\"\n", cfm->signature); - - /* Verify firmware signature */ - if (strcmp(cfm->signature, CFM_SIGNATURE)) { - pr_err("load_cyc2x: not Cyclom-2X firmware!\n"); - return -EINVAL; - } - - pr_info("firmware version=%u\n", cfm->version); - - /* Verify firmware module format version */ - if (cfm->version != CFM_VERSION) { - pr_err("%s: firmware format %u rejected! Expecting %u.\n", - __func__, cfm->version, CFM_VERSION); - return -EINVAL; - } - - /* Verify firmware module length and checksum */ - cksum = checksum((u8*)&cfm->info, sizeof(struct cycx_fw_info) + - cfm->info.codesize); -/* - FIXME cfm->info.codesize is off by 2 - if (((len - sizeof(struct cycx_firmware) - 1) != cfm->info.codesize) || -*/ - if (cksum != cfm->checksum) { - pr_err("%s: firmware corrupted!\n", __func__); - pr_err(" cdsize = 0x%x (expected 0x%lx)\n", - len - (int)sizeof(struct cycx_firmware) - 1, - cfm->info.codesize); - pr_err(" chksum = 0x%x (expected 0x%x)\n", - cksum, cfm->checksum); - return -EINVAL; - } - - /* If everything is ok, set reset, data and code pointers */ - img_hdr = (struct cycx_fw_header *)&cfm->image; -#ifdef FIRMWARE_DEBUG - pr_info("%s: image sizes\n", __func__); - pr_info(" reset=%lu\n", img_hdr->reset_size); - pr_info(" data=%lu\n", img_hdr->data_size); - pr_info(" code=%lu\n", img_hdr->code_size); -#endif - reset_image = ((u8 *)img_hdr) + sizeof(struct cycx_fw_header); - data_image = reset_image + img_hdr->reset_size; - code_image = data_image + img_hdr->data_size; - - /*---- Start load ----*/ - /* Announce */ - pr_info("loading firmware %s (ID=%u)...\n", - cfm->descr[0] ? cfm->descr : "unknown firmware", - cfm->info.codeid); - - for (i = 0 ; i < 5 ; i++) { - /* Reset Cyclom hardware */ - if (!reset_cyc2x(hw->dpmbase)) { - pr_err("dpm problem or board not found\n"); - return -EINVAL; - } - - /* Load reset.bin */ - cycx_reset_boot(hw->dpmbase, reset_image, img_hdr->reset_size); - /* reset is waiting for boot */ - writew(GEN_POWER_ON, pt_cycld); - msleep_interruptible(1 * 1000); - - for (j = 0 ; j < 3 ; j++) - if (!readw(pt_cycld)) - goto reset_loaded; - else - msleep_interruptible(1 * 1000); - } - - pr_err("reset not started\n"); - return -EINVAL; - -reset_loaded: - /* Load data.bin */ - if (cycx_data_boot(hw->dpmbase, data_image, img_hdr->data_size)) { - pr_err("cannot load data file\n"); - return -EINVAL; - } - - /* Load code.bin */ - if (cycx_code_boot(hw->dpmbase, code_image, img_hdr->code_size)) { - pr_err("cannot load code file\n"); - return -EINVAL; - } - - /* Prepare boot-time configuration data */ - cycx_bootcfg(hw); - - /* kick-off CPU */ - cycx_start(hw->dpmbase); - - /* Arthur Ganzert's tip: wait a while after the firmware loading... - seg abr 26 17:17:12 EST 1999 - acme */ - msleep_interruptible(7 * 1000); - pr_info("firmware loaded!\n"); - - /* enable interrupts */ - cycx_inten(hw); - - return 0; -} - -/* Prepare boot-time firmware configuration data. - * o initialize configuration data area - From async.doc - V_3.4.0 - 07/18/1994 - - As of now, only static buffers are available to the user. - So, the bit VD_RXDIRC must be set in 'valid'. That means that user - wants to use the static transmission and reception buffers. */ -static void cycx_bootcfg(struct cycx_hw *hw) -{ - /* use fixed buffers */ - writeb(FIXED_BUFFERS, hw->dpmbase + CONF_OFFSET); -} - -/* Detect Cyclom 2x adapter. - * Following tests are used to detect Cyclom 2x adapter: - * to be completed based on the tests done below - * Return 1 if detected o.k. or 0 if failed. - * Note: This test is destructive! Adapter will be left in shutdown - * state after the test. */ -static int detect_cyc2x(void __iomem *addr) -{ - reset_cyc2x(addr); - - return memory_exists(addr); -} - -/* Miscellaneous */ -/* Get option's index into the options list. - * Return option's index (1 .. N) or zero if option is invalid. */ -static int get_option_index(const long *optlist, long optval) -{ - int i = 1; - - for (; i <= optlist[0]; ++i) - if (optlist[i] == optval) - return i; - - return 0; -} - -/* Reset adapter's CPU. */ -static int reset_cyc2x(void __iomem *addr) -{ - writeb(0, addr + RST_ENABLE); - msleep_interruptible(2 * 1000); - writeb(0, addr + RST_DISABLE); - msleep_interruptible(2 * 1000); - - return memory_exists(addr); -} - -/* Calculate 16-bit CRC using CCITT polynomial. */ -static u16 checksum(u8 *buf, u32 len) -{ - u16 crc = 0; - u16 mask, flag; - - for (; len; --len, ++buf) - for (mask = 0x80; mask; mask >>= 1) { - flag = (crc & 0x8000); - crc <<= 1; - crc |= ((*buf & mask) ? 1 : 0); - - if (flag) - crc ^= 0x1021; - } - - return crc; -} - -module_init(cycx_drv_init); -module_exit(cycx_drv_cleanup); - -/* End */ diff --git a/drivers/net/wan/cycx_main.c b/drivers/net/wan/cycx_main.c deleted file mode 100644 index 81fbbad406be..000000000000 --- a/drivers/net/wan/cycx_main.c +++ /dev/null @@ -1,346 +0,0 @@ -/* -* cycx_main.c Cyclades Cyclom 2X WAN Link Driver. Main module. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdlamain.c by Gene Kozin & -* Jaspreet Singh -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Please look at the bitkeeper changelog (or any other scm tool that ends up -* importing bitkeeper changelog or that replaces bitkeeper in the future as -* main tool for linux development). -* -* 2001/05/09 acme Fix MODULE_DESC for debug, .bss nitpicks, -* some cleanups -* 2000/07/13 acme remove useless #ifdef MODULE and crap -* #if KERNEL_VERSION > blah -* 2000/07/06 acme __exit at cyclomx_cleanup -* 2000/04/02 acme dprintk and cycx_debug -* module_init/module_exit -* 2000/01/21 acme rename cyclomx_open to cyclomx_mod_inc_use_count -* and cyclomx_close to cyclomx_mod_dec_use_count -* 2000/01/08 acme cleanup -* 1999/11/06 acme cycx_down back to life (it needs to be -* called to iounmap the dpmbase) -* 1999/08/09 acme removed references to enable_tx_int -* use spinlocks instead of cli/sti in -* cyclomx_set_state -* 1999/05/19 acme works directly linked into the kernel -* init_waitqueue_head for 2.3.* kernel -* 1999/05/18 acme major cleanup (polling not needed), etc -* 1998/08/28 acme minor cleanup (ioctls for firmware deleted) -* queue_task activated -* 1998/08/08 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include /* offsetof(), etc. */ -#include /* return codes */ -#include /* inline memset(), etc. */ -#include /* kmalloc(), kfree() */ -#include /* printk(), and other useful stuff */ -#include /* support for loadable modules */ -#include /* request_region(), release_region() */ -#include /* WAN router definitions */ -#include /* cyclomx common user API definitions */ -#include /* __init (when not using as a module) */ -#include - -unsigned int cycx_debug; - -MODULE_AUTHOR("Arnaldo Carvalho de Melo"); -MODULE_DESCRIPTION("Cyclom 2X Sync Card Driver."); -MODULE_LICENSE("GPL"); -module_param(cycx_debug, int, 0); -MODULE_PARM_DESC(cycx_debug, "cyclomx debug level"); - -/* Defines & Macros */ - -#define CYCX_DRV_VERSION 0 /* version number */ -#define CYCX_DRV_RELEASE 11 /* release (minor version) number */ -#define CYCX_MAX_CARDS 1 /* max number of adapters */ - -#define CONFIG_CYCX_CARDS 1 - -/* Function Prototypes */ - -/* WAN link driver entry points */ -static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf); -static int cycx_wan_shutdown(struct wan_device *wandev); - -/* Miscellaneous functions */ -static irqreturn_t cycx_isr(int irq, void *dev_id); - -/* Global Data - * Note: All data must be explicitly initialized!!! - */ - -/* private data */ -static const char cycx_drvname[] = "cyclomx"; -static const char cycx_fullname[] = "CYCLOM 2X(tm) Sync Card Driver"; -static const char cycx_copyright[] = "(c) 1998-2003 Arnaldo Carvalho de Melo " - ""; -static int cycx_ncards = CONFIG_CYCX_CARDS; -static struct cycx_device *cycx_card_array; /* adapter data space */ - -/* Kernel Loadable Module Entry Points */ - -/* - * Module 'insert' entry point. - * o print announcement - * o allocate adapter data space - * o initialize static data - * o register all cards with WAN router - * o calibrate Cyclom 2X shared memory access delay. - * - * Return: 0 Ok - * < 0 error. - * Context: process - */ -static int __init cycx_init(void) -{ - int cnt, err = -ENOMEM; - - pr_info("%s v%u.%u %s\n", - cycx_fullname, CYCX_DRV_VERSION, CYCX_DRV_RELEASE, - cycx_copyright); - - /* Verify number of cards and allocate adapter data space */ - cycx_ncards = min_t(int, cycx_ncards, CYCX_MAX_CARDS); - cycx_ncards = max_t(int, cycx_ncards, 1); - cycx_card_array = kcalloc(cycx_ncards, sizeof(struct cycx_device), GFP_KERNEL); - if (!cycx_card_array) - goto out; - - - /* Register adapters with WAN router */ - for (cnt = 0; cnt < cycx_ncards; ++cnt) { - struct cycx_device *card = &cycx_card_array[cnt]; - struct wan_device *wandev = &card->wandev; - - sprintf(card->devname, "%s%d", cycx_drvname, cnt + 1); - wandev->magic = ROUTER_MAGIC; - wandev->name = card->devname; - wandev->private = card; - wandev->setup = cycx_wan_setup; - wandev->shutdown = cycx_wan_shutdown; - err = register_wan_device(wandev); - - if (err) { - pr_err("%s registration failed with error %d!\n", - card->devname, err); - break; - } - } - - err = -ENODEV; - if (!cnt) { - kfree(cycx_card_array); - goto out; - } - err = 0; - cycx_ncards = cnt; /* adjust actual number of cards */ -out: return err; -} - -/* - * Module 'remove' entry point. - * o unregister all adapters from the WAN router - * o release all remaining system resources - */ -static void __exit cycx_exit(void) -{ - int i = 0; - - for (; i < cycx_ncards; ++i) { - struct cycx_device *card = &cycx_card_array[i]; - unregister_wan_device(card->devname); - } - - kfree(cycx_card_array); -} - -/* WAN Device Driver Entry Points */ -/* - * Setup/configure WAN link driver. - * o check adapter state - * o make sure firmware is present in configuration - * o allocate interrupt vector - * o setup Cyclom 2X hardware - * o call appropriate routine to perform protocol-specific initialization - * - * This function is called when router handles ROUTER_SETUP IOCTL. The - * configuration structure is in kernel memory (including extended data, if - * any). - */ -static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf) -{ - int rc = -EFAULT; - struct cycx_device *card; - int irq; - - /* Sanity checks */ - - if (!wandev || !wandev->private || !conf) - goto out; - - card = wandev->private; - rc = -EBUSY; - if (wandev->state != WAN_UNCONFIGURED) - goto out; - - rc = -EINVAL; - if (!conf->data_size || !conf->data) { - pr_err("%s: firmware not found in configuration data!\n", - wandev->name); - goto out; - } - - if (conf->irq <= 0) { - pr_err("%s: can't configure without IRQ!\n", wandev->name); - goto out; - } - - /* Allocate IRQ */ - irq = conf->irq == 2 ? 9 : conf->irq; /* IRQ2 -> IRQ9 */ - - if (request_irq(irq, cycx_isr, 0, wandev->name, card)) { - pr_err("%s: can't reserve IRQ %d!\n", wandev->name, irq); - goto out; - } - - /* Configure hardware, load firmware, etc. */ - memset(&card->hw, 0, sizeof(card->hw)); - card->hw.irq = irq; - card->hw.dpmsize = CYCX_WINDOWSIZE; - card->hw.fwid = CFID_X25_2X; - spin_lock_init(&card->lock); - init_waitqueue_head(&card->wait_stats); - - rc = cycx_setup(&card->hw, conf->data, conf->data_size, conf->maddr); - if (rc) - goto out_irq; - - /* Initialize WAN device data space */ - wandev->irq = irq; - wandev->dma = wandev->ioport = 0; - wandev->maddr = (unsigned long)card->hw.dpmbase; - wandev->msize = card->hw.dpmsize; - wandev->hw_opt[2] = 0; - wandev->hw_opt[3] = card->hw.fwid; - - /* Protocol-specific initialization */ - switch (card->hw.fwid) { -#ifdef CONFIG_CYCLOMX_X25 - case CFID_X25_2X: - rc = cycx_x25_wan_init(card, conf); - break; -#endif - default: - pr_err("%s: this firmware is not supported!\n", wandev->name); - rc = -EINVAL; - } - - if (rc) { - cycx_down(&card->hw); - goto out_irq; - } - - rc = 0; -out: - return rc; -out_irq: - free_irq(irq, card); - goto out; -} - -/* - * Shut down WAN link driver. - * o shut down adapter hardware - * o release system resources. - * - * This function is called by the router when device is being unregistered or - * when it handles ROUTER_DOWN IOCTL. - */ -static int cycx_wan_shutdown(struct wan_device *wandev) -{ - int ret = -EFAULT; - struct cycx_device *card; - - /* sanity checks */ - if (!wandev || !wandev->private) - goto out; - - ret = 0; - if (wandev->state == WAN_UNCONFIGURED) - goto out; - - card = wandev->private; - wandev->state = WAN_UNCONFIGURED; - cycx_down(&card->hw); - pr_info("%s: irq %d being freed!\n", wandev->name, wandev->irq); - free_irq(wandev->irq, card); -out: return ret; -} - -/* Miscellaneous */ -/* - * Cyclom 2X Interrupt Service Routine. - * o acknowledge Cyclom 2X hardware interrupt. - * o call protocol-specific interrupt service routine, if any. - */ -static irqreturn_t cycx_isr(int irq, void *dev_id) -{ - struct cycx_device *card = dev_id; - - if (card->wandev.state == WAN_UNCONFIGURED) - goto out; - - if (card->in_isr) { - pr_warn("%s: interrupt re-entrancy on IRQ %d!\n", - card->devname, card->wandev.irq); - goto out; - } - - if (card->isr) - card->isr(card); - return IRQ_HANDLED; -out: - return IRQ_NONE; -} - -/* Set WAN device state. */ -void cycx_set_state(struct cycx_device *card, int state) -{ - unsigned long flags; - char *string_state = NULL; - - spin_lock_irqsave(&card->lock, flags); - - if (card->wandev.state != state) { - switch (state) { - case WAN_CONNECTED: - string_state = "connected!"; - break; - case WAN_DISCONNECTED: - string_state = "disconnected!"; - break; - } - pr_info("%s: link %s\n", card->devname, string_state); - card->wandev.state = state; - } - - card->state_tick = jiffies; - spin_unlock_irqrestore(&card->lock, flags); -} - -module_init(cycx_init); -module_exit(cycx_exit); diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c deleted file mode 100644 index 06f3f6309e4b..000000000000 --- a/drivers/net/wan/cycx_x25.c +++ /dev/null @@ -1,1602 +0,0 @@ -/* -* cycx_x25.c Cyclom 2X WAN Link Driver. X.25 module. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdla_x25.c by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 2001/01/12 acme use dev_kfree_skb_irq on interrupt context -* 2000/04/02 acme dprintk, cycx_debug -* fixed the bug introduced in get_dev_by_lcn and -* get_dev_by_dte_addr by the anonymous hacker -* that converted this driver to softnet -* 2000/01/08 acme cleanup -* 1999/10/27 acme use ARPHRD_HWX25 so that the X.25 stack know -* that we have a X.25 stack implemented in -* firmware onboard -* 1999/10/18 acme support for X.25 sockets in if_send, -* beware: socket(AF_X25...) IS WORK IN PROGRESS, -* TCP/IP over X.25 via wanrouter not affected, -* working. -* 1999/10/09 acme chan_disc renamed to chan_disconnect, -* began adding support for X.25 sockets: -* conf->protocol in new_if -* 1999/10/05 acme fixed return E... to return -E... -* 1999/08/10 acme serialized access to the card thru a spinlock -* in x25_exec -* 1999/08/09 acme removed per channel spinlocks -* removed references to enable_tx_int -* 1999/05/28 acme fixed nibble_to_byte, ackvc now properly treated -* if_send simplified -* 1999/05/25 acme fixed t1, t2, t21 & t23 configuration -* use spinlocks instead of cli/sti in some points -* 1999/05/24 acme finished the x25_get_stat function -* 1999/05/23 acme dev->type = ARPHRD_X25 (tcpdump only works, -* AFAIT, with ARPHRD_ETHER). This seems to be -* needed to use socket(AF_X25)... -* Now the config file must specify a peer media -* address for svc channels over a crossover cable. -* Removed hold_timeout from x25_channel_t, -* not used. -* A little enhancement in the DEBUG processing -* 1999/05/22 acme go to DISCONNECTED in disconnect_confirm_intr, -* instead of chan_disc. -* 1999/05/16 marcelo fixed timer initialization in SVCs -* 1999/01/05 acme x25_configure now get (most of) all -* parameters... -* 1999/01/05 acme pktlen now (correctly) uses log2 (value -* configured) -* 1999/01/03 acme judicious use of data types (u8, u16, u32, etc) -* 1999/01/03 acme cyx_isr: reset dpmbase to acknowledge -* indication (interrupt from cyclom 2x) -* 1999/01/02 acme cyx_isr: first hackings... -* 1999/01/0203 acme when initializing an array don't give less -* elements than declared... -* example: char send_cmd[6] = "?\xFF\x10"; -* you'll gonna lose a couple hours, 'cause your -* brain won't admit that there's an error in the -* above declaration... the side effect is that -* memset is put into the unresolved symbols -* instead of using the inline memset functions... -* 1999/01/02 acme began chan_connect, chan_send, x25_send -* 1998/12/31 acme x25_configure -* this code can be compiled as non module -* 1998/12/27 acme code cleanup -* IPX code wiped out! let's decrease code -* complexity for now, remember: I'm learning! :) -* bps_to_speed_code OK -* 1998/12/26 acme Minimal debug code cleanup -* 1998/08/08 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#define CYCLOMX_X25_DEBUG 1 - -#include /* isdigit() */ -#include /* return codes */ -#include /* ARPHRD_HWX25 */ -#include /* printk(), and other useful stuff */ -#include -#include /* inline memset(), etc. */ -#include -#include /* kmalloc(), kfree() */ -#include /* offsetof(), etc. */ -#include /* WAN router definitions */ - -#include /* htons(), etc. */ - -#include /* Cyclom 2X common user API definitions */ -#include /* X.25 firmware API definitions */ - -#include - -/* Defines & Macros */ -#define CYCX_X25_MAX_CMD_RETRY 5 -#define CYCX_X25_CHAN_MTU 2048 /* unfragmented logical channel MTU */ - -/* Data Structures */ -/* This is an extension of the 'struct net_device' we create for each network - interface to keep the rest of X.25 channel-specific data. */ -struct cycx_x25_channel { - /* This member must be first. */ - struct net_device *slave; /* WAN slave */ - - char name[WAN_IFNAME_SZ+1]; /* interface name, ASCIIZ */ - char addr[WAN_ADDRESS_SZ+1]; /* media address, ASCIIZ */ - char *local_addr; /* local media address, ASCIIZ - - svc thru crossover cable */ - s16 lcn; /* logical channel number/conn.req.key*/ - u8 link; - struct timer_list timer; /* timer used for svc channel disc. */ - u16 protocol; /* ethertype, 0 - multiplexed */ - u8 svc; /* 0 - permanent, 1 - switched */ - u8 state; /* channel state */ - u8 drop_sequence; /* mark sequence for dropping */ - u32 idle_tmout; /* sec, before disconnecting */ - struct sk_buff *rx_skb; /* receive socket buffer */ - struct cycx_device *card; /* -> owner */ - struct net_device_stats ifstats;/* interface statistics */ -}; - -/* Function Prototypes */ -/* WAN link driver entry points. These are called by the WAN router module. */ -static int cycx_wan_update(struct wan_device *wandev), - cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf), - cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev); - -/* Network device interface */ -static int cycx_netdevice_init(struct net_device *dev); -static int cycx_netdevice_open(struct net_device *dev); -static int cycx_netdevice_stop(struct net_device *dev); -static int cycx_netdevice_hard_header(struct sk_buff *skb, - struct net_device *dev, u16 type, - const void *daddr, const void *saddr, - unsigned len); -static int cycx_netdevice_rebuild_header(struct sk_buff *skb); -static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev); - -static struct net_device_stats * - cycx_netdevice_get_stats(struct net_device *dev); - -/* Interrupt handlers */ -static void cycx_x25_irq_handler(struct cycx_device *card), - cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd), - cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd), - cycx_x25_irq_log(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_stat(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_connect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_disconnect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_connect(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_disconnect(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_spurious(struct cycx_device *card, - struct cycx_x25_cmd *cmd); - -/* X.25 firmware interface functions */ -static int cycx_x25_configure(struct cycx_device *card, - struct cycx_x25_config *conf), - cycx_x25_get_stats(struct cycx_device *card), - cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm, - int len, void *buf), - cycx_x25_connect_response(struct cycx_device *card, - struct cycx_x25_channel *chan), - cycx_x25_disconnect_response(struct cycx_device *card, u8 link, - u8 lcn); - -/* channel functions */ -static int cycx_x25_chan_connect(struct net_device *dev), - cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb); - -static void cycx_x25_chan_disconnect(struct net_device *dev), - cycx_x25_chan_send_event(struct net_device *dev, u8 event); - -/* Miscellaneous functions */ -static void cycx_x25_set_chan_state(struct net_device *dev, u8 state), - cycx_x25_chan_timer(unsigned long d); - -static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble), - reset_timer(struct net_device *dev); - -static u8 bps_to_speed_code(u32 bps); -static u8 cycx_log2(u32 n); - -static unsigned dec_to_uint(u8 *str, int len); - -static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev, - s16 lcn); -static struct net_device * - cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte); - -static void cycx_x25_chan_setup(struct net_device *dev); - -#ifdef CYCLOMX_X25_DEBUG -static void hex_dump(char *msg, unsigned char *p, int len); -static void cycx_x25_dump_config(struct cycx_x25_config *conf); -static void cycx_x25_dump_stats(struct cycx_x25_stats *stats); -static void cycx_x25_dump_devs(struct wan_device *wandev); -#else -#define hex_dump(msg, p, len) -#define cycx_x25_dump_config(conf) -#define cycx_x25_dump_stats(stats) -#define cycx_x25_dump_devs(wandev) -#endif -/* Public Functions */ - -/* X.25 Protocol Initialization routine. - * - * This routine is called by the main Cyclom 2X module during setup. At this - * point adapter is completely initialized and X.25 firmware is running. - * o configure adapter - * o initialize protocol-specific fields of the adapter data space. - * - * Return: 0 o.k. - * < 0 failure. */ -int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf) -{ - struct cycx_x25_config cfg; - - /* Verify configuration ID */ - if (conf->config_id != WANCONFIG_X25) { - pr_info("%s: invalid configuration ID %u!\n", - card->devname, conf->config_id); - return -EINVAL; - } - - /* Initialize protocol-specific fields */ - card->mbox = card->hw.dpmbase + X25_MBOX_OFFS; - card->u.x.connection_keys = 0; - spin_lock_init(&card->u.x.lock); - - /* Configure adapter. Here we set reasonable defaults, then parse - * device configuration structure and set configuration options. - * Most configuration options are verified and corrected (if - * necessary) since we can't rely on the adapter to do so and don't - * want it to fail either. */ - memset(&cfg, 0, sizeof(cfg)); - cfg.link = 0; - cfg.clock = conf->clocking == WANOPT_EXTERNAL ? 8 : 55; - cfg.speed = bps_to_speed_code(conf->bps); - cfg.n3win = 7; - cfg.n2win = 2; - cfg.n2 = 5; - cfg.nvc = 1; - cfg.npvc = 1; - cfg.flags = 0x02; /* default = V35 */ - cfg.t1 = 10; /* line carrier timeout */ - cfg.t2 = 29; /* tx timeout */ - cfg.t21 = 180; /* CALL timeout */ - cfg.t23 = 180; /* CLEAR timeout */ - - /* adjust MTU */ - if (!conf->mtu || conf->mtu >= 512) - card->wandev.mtu = 512; - else if (conf->mtu >= 256) - card->wandev.mtu = 256; - else if (conf->mtu >= 128) - card->wandev.mtu = 128; - else - card->wandev.mtu = 64; - - cfg.pktlen = cycx_log2(card->wandev.mtu); - - if (conf->station == WANOPT_DTE) { - cfg.locaddr = 3; /* DTE */ - cfg.remaddr = 1; /* DCE */ - } else { - cfg.locaddr = 1; /* DCE */ - cfg.remaddr = 3; /* DTE */ - } - - if (conf->interface == WANOPT_RS232) - cfg.flags = 0; /* FIXME just reset the 2nd bit */ - - if (conf->u.x25.hi_pvc) { - card->u.x.hi_pvc = min_t(unsigned int, conf->u.x25.hi_pvc, 4095); - card->u.x.lo_pvc = min_t(unsigned int, conf->u.x25.lo_pvc, card->u.x.hi_pvc); - } - - if (conf->u.x25.hi_svc) { - card->u.x.hi_svc = min_t(unsigned int, conf->u.x25.hi_svc, 4095); - card->u.x.lo_svc = min_t(unsigned int, conf->u.x25.lo_svc, card->u.x.hi_svc); - } - - if (card->u.x.lo_pvc == 255) - cfg.npvc = 0; - else - cfg.npvc = card->u.x.hi_pvc - card->u.x.lo_pvc + 1; - - cfg.nvc = card->u.x.hi_svc - card->u.x.lo_svc + 1 + cfg.npvc; - - if (conf->u.x25.hdlc_window) - cfg.n2win = min_t(unsigned int, conf->u.x25.hdlc_window, 7); - - if (conf->u.x25.pkt_window) - cfg.n3win = min_t(unsigned int, conf->u.x25.pkt_window, 7); - - if (conf->u.x25.t1) - cfg.t1 = min_t(unsigned int, conf->u.x25.t1, 30); - - if (conf->u.x25.t2) - cfg.t2 = min_t(unsigned int, conf->u.x25.t2, 30); - - if (conf->u.x25.t11_t21) - cfg.t21 = min_t(unsigned int, conf->u.x25.t11_t21, 30); - - if (conf->u.x25.t13_t23) - cfg.t23 = min_t(unsigned int, conf->u.x25.t13_t23, 30); - - if (conf->u.x25.n2) - cfg.n2 = min_t(unsigned int, conf->u.x25.n2, 30); - - /* initialize adapter */ - if (cycx_x25_configure(card, &cfg)) - return -EIO; - - /* Initialize protocol-specific fields of adapter data space */ - card->wandev.bps = conf->bps; - card->wandev.interface = conf->interface; - card->wandev.clocking = conf->clocking; - card->wandev.station = conf->station; - card->isr = cycx_x25_irq_handler; - card->exec = NULL; - card->wandev.update = cycx_wan_update; - card->wandev.new_if = cycx_wan_new_if; - card->wandev.del_if = cycx_wan_del_if; - card->wandev.state = WAN_DISCONNECTED; - - return 0; -} - -/* WAN Device Driver Entry Points */ -/* Update device status & statistics. */ -static int cycx_wan_update(struct wan_device *wandev) -{ - /* sanity checks */ - if (!wandev || !wandev->private) - return -EFAULT; - - if (wandev->state == WAN_UNCONFIGURED) - return -ENODEV; - - cycx_x25_get_stats(wandev->private); - - return 0; -} - -/* Create new logical channel. - * This routine is called by the router when ROUTER_IFNEW IOCTL is being - * handled. - * o parse media- and hardware-specific configuration - * o make sure that a new channel can be created - * o allocate resources, if necessary - * o prepare network device structure for registration. - * - * Return: 0 o.k. - * < 0 failure (channel will not be created) */ -static int cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf) -{ - struct cycx_device *card = wandev->private; - struct cycx_x25_channel *chan; - int err = 0; - - if (!conf->name[0] || strlen(conf->name) > WAN_IFNAME_SZ) { - pr_info("%s: invalid interface name!\n", card->devname); - return -EINVAL; - } - - dev = alloc_netdev(sizeof(struct cycx_x25_channel), conf->name, - cycx_x25_chan_setup); - if (!dev) - return -ENOMEM; - - chan = netdev_priv(dev); - strcpy(chan->name, conf->name); - chan->card = card; - chan->link = conf->port; - chan->protocol = conf->protocol ? ETH_P_X25 : ETH_P_IP; - chan->rx_skb = NULL; - /* only used in svc connected thru crossover cable */ - chan->local_addr = NULL; - - if (conf->addr[0] == '@') { /* SVC */ - int len = strlen(conf->local_addr); - - if (len) { - if (len > WAN_ADDRESS_SZ) { - pr_err("%s: %s local addr too long!\n", - wandev->name, chan->name); - err = -EINVAL; - goto error; - } else { - chan->local_addr = kmalloc(len + 1, GFP_KERNEL); - - if (!chan->local_addr) { - err = -ENOMEM; - goto error; - } - } - - strncpy(chan->local_addr, conf->local_addr, - WAN_ADDRESS_SZ); - } - - chan->svc = 1; - strncpy(chan->addr, &conf->addr[1], WAN_ADDRESS_SZ); - init_timer(&chan->timer); - chan->timer.function = cycx_x25_chan_timer; - chan->timer.data = (unsigned long)dev; - - /* Set channel timeouts (default if not specified) */ - chan->idle_tmout = conf->idle_timeout ? conf->idle_timeout : 90; - } else if (isdigit(conf->addr[0])) { /* PVC */ - s16 lcn = dec_to_uint(conf->addr, 0); - - if (lcn >= card->u.x.lo_pvc && lcn <= card->u.x.hi_pvc) - chan->lcn = lcn; - else { - pr_err("%s: PVC %u is out of range on interface %s!\n", - wandev->name, lcn, chan->name); - err = -EINVAL; - goto error; - } - } else { - pr_err("%s: invalid media address on interface %s!\n", - wandev->name, chan->name); - err = -EINVAL; - goto error; - } - - return 0; - -error: - free_netdev(dev); - return err; -} - -/* Delete logical channel. */ -static int cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) { - kfree(chan->local_addr); - if (chan->state == WAN_CONNECTED) - del_timer(&chan->timer); - } - - return 0; -} - - -/* Network Device Interface */ - -static const struct header_ops cycx_header_ops = { - .create = cycx_netdevice_hard_header, - .rebuild = cycx_netdevice_rebuild_header, -}; - -static const struct net_device_ops cycx_netdev_ops = { - .ndo_init = cycx_netdevice_init, - .ndo_open = cycx_netdevice_open, - .ndo_stop = cycx_netdevice_stop, - .ndo_start_xmit = cycx_netdevice_hard_start_xmit, - .ndo_get_stats = cycx_netdevice_get_stats, -}; - -static void cycx_x25_chan_setup(struct net_device *dev) -{ - /* Initialize device driver entry points */ - dev->netdev_ops = &cycx_netdev_ops; - dev->header_ops = &cycx_header_ops; - - /* Initialize media-specific parameters */ - dev->mtu = CYCX_X25_CHAN_MTU; - dev->type = ARPHRD_HWX25; /* ARP h/w type */ - dev->hard_header_len = 0; /* media header length */ - dev->addr_len = 0; /* hardware address length */ -} - -/* Initialize Linux network interface. - * - * This routine is called only once for each interface, during Linux network - * interface registration. Returning anything but zero will fail interface - * registration. */ -static int cycx_netdevice_init(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - struct wan_device *wandev = &card->wandev; - - if (!chan->svc) - *(__be16*)dev->dev_addr = htons(chan->lcn); - - /* Initialize hardware parameters (just for reference) */ - dev->irq = wandev->irq; - dev->dma = wandev->dma; - dev->base_addr = wandev->ioport; - dev->mem_start = (unsigned long)wandev->maddr; - dev->mem_end = (unsigned long)(wandev->maddr + - wandev->msize - 1); - dev->flags |= IFF_NOARP; - - /* Set transmit buffer queue length */ - dev->tx_queue_len = 10; - - /* Initialize socket buffers */ - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); - - return 0; -} - -/* Open network interface. - * o prevent module from unloading by incrementing use count - * o if link is disconnected then initiate connection - * - * Return 0 if O.k. or errno. */ -static int cycx_netdevice_open(struct net_device *dev) -{ - if (netif_running(dev)) - return -EBUSY; /* only one open is allowed */ - - netif_start_queue(dev); - return 0; -} - -/* Close network interface. - * o reset flags. - * o if there's no more open channels then disconnect physical link. */ -static int cycx_netdevice_stop(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - netif_stop_queue(dev); - - if (chan->state == WAN_CONNECTED || chan->state == WAN_CONNECTING) - cycx_x25_chan_disconnect(dev); - - return 0; -} - -/* Build media header. - * o encapsulate packet according to encapsulation type. - * - * The trick here is to put packet type (Ethertype) into 'protocol' field of - * the socket buffer, so that we don't forget it. If encapsulation fails, - * set skb->protocol to 0 and discard packet later. - * - * Return: media header length. */ -static int cycx_netdevice_hard_header(struct sk_buff *skb, - struct net_device *dev, u16 type, - const void *daddr, const void *saddr, - unsigned len) -{ - skb->protocol = htons(type); - - return dev->hard_header_len; -} - -/* * Re-build media header. - * Return: 1 physical address resolved. - * 0 physical address not resolved */ -static int cycx_netdevice_rebuild_header(struct sk_buff *skb) -{ - return 1; -} - -/* Send a packet on a network interface. - * o set busy flag (marks start of the transmission). - * o check link state. If link is not up, then drop the packet. - * o check channel status. If it's down then initiate a call. - * o pass a packet to corresponding WAN device. - * o free socket buffer - * - * Return: 0 complete (socket buffer must be freed) - * non-0 packet may be re-transmitted (tbusy must be set) - * - * Notes: - * 1. This routine is called either by the protocol stack or by the "net - * bottom half" (with interrupts enabled). - * 2. Setting tbusy flag will inhibit further transmit requests from the - * protocol stack and can be used for flow control with protocol layer. */ -static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - - if (!chan->svc) - chan->protocol = ntohs(skb->protocol); - - if (card->wandev.state != WAN_CONNECTED) - ++chan->ifstats.tx_dropped; - else if (chan->svc && chan->protocol && - chan->protocol != ntohs(skb->protocol)) { - pr_info("%s: unsupported Ethertype 0x%04X on interface %s!\n", - card->devname, ntohs(skb->protocol), dev->name); - ++chan->ifstats.tx_errors; - } else if (chan->protocol == ETH_P_IP) { - switch (chan->state) { - case WAN_DISCONNECTED: - if (cycx_x25_chan_connect(dev)) { - netif_stop_queue(dev); - return NETDEV_TX_BUSY; - } - /* fall thru */ - case WAN_CONNECTED: - reset_timer(dev); - dev->trans_start = jiffies; - netif_stop_queue(dev); - - if (cycx_x25_chan_send(dev, skb)) - return NETDEV_TX_BUSY; - - break; - default: - ++chan->ifstats.tx_dropped; - ++card->wandev.stats.tx_dropped; - } - } else { /* chan->protocol == ETH_P_X25 */ - switch (skb->data[0]) { - case X25_IFACE_DATA: - break; - case X25_IFACE_CONNECT: - cycx_x25_chan_connect(dev); - goto free_packet; - case X25_IFACE_DISCONNECT: - cycx_x25_chan_disconnect(dev); - goto free_packet; - default: - pr_info("%s: unknown %d x25-iface request on %s!\n", - card->devname, skb->data[0], dev->name); - ++chan->ifstats.tx_errors; - goto free_packet; - } - - skb_pull(skb, 1); /* Remove control byte */ - reset_timer(dev); - dev->trans_start = jiffies; - netif_stop_queue(dev); - - if (cycx_x25_chan_send(dev, skb)) { - /* prepare for future retransmissions */ - skb_push(skb, 1); - return NETDEV_TX_BUSY; - } - } - -free_packet: - dev_kfree_skb(skb); - - return NETDEV_TX_OK; -} - -/* Get Ethernet-style interface statistics. - * Return a pointer to struct net_device_stats */ -static struct net_device_stats *cycx_netdevice_get_stats(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - return chan ? &chan->ifstats : NULL; -} - -/* Interrupt Handlers */ -/* X.25 Interrupt Service Routine. */ -static void cycx_x25_irq_handler(struct cycx_device *card) -{ - struct cycx_x25_cmd cmd; - u16 z = 0; - - card->in_isr = 1; - card->buff_int_mode_unbusy = 0; - cycx_peek(&card->hw, X25_RXMBOX_OFFS, &cmd, sizeof(cmd)); - - switch (cmd.command) { - case X25_DATA_INDICATION: - cycx_x25_irq_rx(card, &cmd); - break; - case X25_ACK_FROM_VC: - cycx_x25_irq_tx(card, &cmd); - break; - case X25_LOG: - cycx_x25_irq_log(card, &cmd); - break; - case X25_STATISTIC: - cycx_x25_irq_stat(card, &cmd); - break; - case X25_CONNECT_CONFIRM: - cycx_x25_irq_connect_confirm(card, &cmd); - break; - case X25_CONNECT_INDICATION: - cycx_x25_irq_connect(card, &cmd); - break; - case X25_DISCONNECT_INDICATION: - cycx_x25_irq_disconnect(card, &cmd); - break; - case X25_DISCONNECT_CONFIRM: - cycx_x25_irq_disconnect_confirm(card, &cmd); - break; - case X25_LINE_ON: - cycx_set_state(card, WAN_CONNECTED); - break; - case X25_LINE_OFF: - cycx_set_state(card, WAN_DISCONNECTED); - break; - default: - cycx_x25_irq_spurious(card, &cmd); - break; - } - - cycx_poke(&card->hw, 0, &z, sizeof(z)); - cycx_poke(&card->hw, X25_RXMBOX_OFFS, &z, sizeof(z)); - card->in_isr = 0; -} - -/* Transmit interrupt handler. - * o Release socket buffer - * o Clear 'tbusy' flag */ -static void cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ - struct net_device *dev; - struct wan_device *wandev = &card->wandev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - - /* unbusy device and then dev_tint(); */ - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (dev) { - card->buff_int_mode_unbusy = 1; - netif_wake_queue(dev); - } else - pr_err("%s:ackvc for inexistent lcn %d\n", card->devname, lcn); -} - -/* Receive interrupt handler. - * This routine handles fragmented IP packets using M-bit according to the - * RFC1356. - * o map logical channel number to network interface. - * o allocate socket buffer or append received packet to the existing one. - * o if M-bit is reset (i.e. it's the last packet in a sequence) then - * decapsulate packet and pass socket buffer to the protocol stack. - * - * Notes: - * 1. When allocating a socket buffer, if M-bit is set then more data is - * coming and we have to allocate buffer for the maximum IP packet size - * expected on this channel. - * 2. If something goes wrong and X.25 packet has to be dropped (e.g. no - * socket buffers available) the whole packet sequence must be discarded. */ -static void cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - struct cycx_x25_channel *chan; - struct sk_buff *skb; - u8 bitm, lcn; - int pktlen = cmd->len - 5; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 4, &bitm, sizeof(bitm)); - bitm &= 0x10; - - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s: receiving on orphaned LCN %d!\n", - card->devname, lcn); - return; - } - - chan = netdev_priv(dev); - reset_timer(dev); - - if (chan->drop_sequence) { - if (!bitm) - chan->drop_sequence = 0; - else - return; - } - - if ((skb = chan->rx_skb) == NULL) { - /* Allocate new socket buffer */ - int bufsize = bitm ? dev->mtu : pktlen; - - if ((skb = dev_alloc_skb((chan->protocol == ETH_P_X25 ? 1 : 0) + - bufsize + - dev->hard_header_len)) == NULL) { - pr_info("%s: no socket buffers available!\n", - card->devname); - chan->drop_sequence = 1; - ++chan->ifstats.rx_dropped; - return; - } - - if (chan->protocol == ETH_P_X25) /* X.25 socket layer control */ - /* 0 = data packet (dev_alloc_skb zeroed skb->data) */ - skb_put(skb, 1); - - skb->dev = dev; - skb->protocol = htons(chan->protocol); - chan->rx_skb = skb; - } - - if (skb_tailroom(skb) < pktlen) { - /* No room for the packet. Call off the whole thing! */ - dev_kfree_skb_irq(skb); - chan->rx_skb = NULL; - - if (bitm) - chan->drop_sequence = 1; - - pr_info("%s: unexpectedly long packet sequence on interface %s!\n", - card->devname, dev->name); - ++chan->ifstats.rx_length_errors; - return; - } - - /* Append packet to the socket buffer */ - cycx_peek(&card->hw, cmd->buf + 5, skb_put(skb, pktlen), pktlen); - - if (bitm) - return; /* more data is coming */ - - chan->rx_skb = NULL; /* dequeue packet */ - - ++chan->ifstats.rx_packets; - chan->ifstats.rx_bytes += pktlen; - - skb_reset_mac_header(skb); - netif_rx(skb); -} - -/* Connect interrupt handler. */ -static void cycx_x25_irq_connect(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev = NULL; - struct cycx_x25_channel *chan; - u8 d[32], - loc[24], - rem[24]; - u8 lcn, sizeloc, sizerem; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 5, &sizeloc, sizeof(sizeloc)); - cycx_peek(&card->hw, cmd->buf + 6, d, cmd->len - 6); - - sizerem = sizeloc >> 4; - sizeloc &= 0x0F; - - loc[0] = rem[0] = '\0'; - - if (sizeloc) - nibble_to_byte(d, loc, sizeloc, 0); - - if (sizerem) - nibble_to_byte(d + (sizeloc >> 1), rem, sizerem, sizeloc & 1); - - dprintk(1, KERN_INFO "%s:lcn=%d, local=%s, remote=%s\n", - __func__, lcn, loc, rem); - - dev = cycx_x25_get_dev_by_dte_addr(wandev, rem); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s: connect not expected: remote %s!\n", - card->devname, rem); - return; - } - - chan = netdev_priv(dev); - chan->lcn = lcn; - cycx_x25_connect_response(card, chan); - cycx_x25_set_chan_state(dev, WAN_CONNECTED); -} - -/* Connect confirm interrupt handler. */ -static void cycx_x25_irq_connect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - struct cycx_x25_channel *chan; - u8 lcn, key; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 1, &key, sizeof(key)); - dprintk(1, KERN_INFO "%s: %s:lcn=%d, key=%d\n", - card->devname, __func__, lcn, key); - - dev = cycx_x25_get_dev_by_lcn(wandev, -key); - if (!dev) { - /* Invalid channel, discard packet */ - clear_bit(--key, (void*)&card->u.x.connection_keys); - pr_info("%s: connect confirm not expected: lcn %d, key=%d!\n", - card->devname, lcn, key); - return; - } - - clear_bit(--key, (void*)&card->u.x.connection_keys); - chan = netdev_priv(dev); - chan->lcn = lcn; - cycx_x25_set_chan_state(dev, WAN_CONNECTED); -} - -/* Disconnect confirm interrupt handler. */ -static void cycx_x25_irq_disconnect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - dprintk(1, KERN_INFO "%s: %s:lcn=%d\n", - card->devname, __func__, lcn); - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s:disconnect confirm not expected!:lcn %d\n", - card->devname, lcn); - return; - } - - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); -} - -/* disconnect interrupt handler. */ -static void cycx_x25_irq_disconnect(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - dprintk(1, KERN_INFO "%s:lcn=%d\n", __func__, lcn); - - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (dev) { - struct cycx_x25_channel *chan = netdev_priv(dev); - - cycx_x25_disconnect_response(card, chan->link, lcn); - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); - } else - cycx_x25_disconnect_response(card, 0, lcn); -} - -/* LOG interrupt handler. */ -static void cycx_x25_irq_log(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ -#if CYCLOMX_X25_DEBUG - char bf[20]; - u16 size, toread, link, msg_code; - u8 code, routine; - - cycx_peek(&card->hw, cmd->buf, &msg_code, sizeof(msg_code)); - cycx_peek(&card->hw, cmd->buf + 2, &link, sizeof(link)); - cycx_peek(&card->hw, cmd->buf + 4, &size, sizeof(size)); - /* at most 20 bytes are available... thanks to Daniela :) */ - toread = size < 20 ? size : 20; - cycx_peek(&card->hw, cmd->buf + 10, &bf, toread); - cycx_peek(&card->hw, cmd->buf + 10 + toread, &code, 1); - cycx_peek(&card->hw, cmd->buf + 10 + toread + 1, &routine, 1); - - pr_info("cycx_x25_irq_handler: X25_LOG (0x4500) indic.:\n"); - pr_info("cmd->buf=0x%X\n", cmd->buf); - pr_info("Log message code=0x%X\n", msg_code); - pr_info("Link=%d\n", link); - pr_info("log code=0x%X\n", code); - pr_info("log routine=0x%X\n", routine); - pr_info("Message size=%d\n", size); - hex_dump("Message", bf, toread); -#endif -} - -/* STATISTIC interrupt handler. */ -static void cycx_x25_irq_stat(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - cycx_peek(&card->hw, cmd->buf, &card->u.x.stats, - sizeof(card->u.x.stats)); - hex_dump("cycx_x25_irq_stat", (unsigned char*)&card->u.x.stats, - sizeof(card->u.x.stats)); - cycx_x25_dump_stats(&card->u.x.stats); - wake_up_interruptible(&card->wait_stats); -} - -/* Spurious interrupt handler. - * o print a warning - * If number of spurious interrupts exceeded some limit, then ??? */ -static void cycx_x25_irq_spurious(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - pr_info("%s: spurious interrupt (0x%X)!\n", - card->devname, cmd->command); -} -#ifdef CYCLOMX_X25_DEBUG -static void hex_dump(char *msg, unsigned char *p, int len) -{ - print_hex_dump(KERN_INFO, msg, DUMP_PREFIX_OFFSET, 16, 1, - p, len, true); -} -#endif - -/* Cyclom 2X Firmware-Specific Functions */ -/* Exec X.25 command. */ -static int x25_exec(struct cycx_device *card, int command, int link, - void *d1, int len1, void *d2, int len2) -{ - struct cycx_x25_cmd c; - unsigned long flags; - u32 addr = 0x1200 + 0x2E0 * link + 0x1E2; - u8 retry = CYCX_X25_MAX_CMD_RETRY; - int err = 0; - - c.command = command; - c.link = link; - c.len = len1 + len2; - - spin_lock_irqsave(&card->u.x.lock, flags); - - /* write command */ - cycx_poke(&card->hw, X25_MBOX_OFFS, &c, sizeof(c) - sizeof(c.buf)); - - /* write X.25 data */ - if (d1) { - cycx_poke(&card->hw, addr, d1, len1); - - if (d2) { - if (len2 > 254) { - u32 addr1 = 0xA00 + 0x400 * link; - - cycx_poke(&card->hw, addr + len1, d2, 249); - cycx_poke(&card->hw, addr1, ((u8*)d2) + 249, - len2 - 249); - } else - cycx_poke(&card->hw, addr + len1, d2, len2); - } - } - - /* generate interruption, executing command */ - cycx_intr(&card->hw); - - /* wait till card->mbox == 0 */ - do { - err = cycx_exec(card->mbox); - } while (retry-- && err); - - spin_unlock_irqrestore(&card->u.x.lock, flags); - - return err; -} - -/* Configure adapter. */ -static int cycx_x25_configure(struct cycx_device *card, - struct cycx_x25_config *conf) -{ - struct { - u16 nlinks; - struct cycx_x25_config conf[2]; - } x25_cmd_conf; - - memset(&x25_cmd_conf, 0, sizeof(x25_cmd_conf)); - x25_cmd_conf.nlinks = 2; - x25_cmd_conf.conf[0] = *conf; - /* FIXME: we need to find a way in the wanrouter framework - to configure the second link, for now lets use it - with the same config from the first link, fixing - the interface type to RS232, the speed in 38400 and - the clock to external */ - x25_cmd_conf.conf[1] = *conf; - x25_cmd_conf.conf[1].link = 1; - x25_cmd_conf.conf[1].speed = 5; /* 38400 */ - x25_cmd_conf.conf[1].clock = 8; - x25_cmd_conf.conf[1].flags = 0; /* default = RS232 */ - - cycx_x25_dump_config(&x25_cmd_conf.conf[0]); - cycx_x25_dump_config(&x25_cmd_conf.conf[1]); - - return x25_exec(card, X25_CONFIG, 0, - &x25_cmd_conf, sizeof(x25_cmd_conf), NULL, 0); -} - -/* Get protocol statistics. */ -static int cycx_x25_get_stats(struct cycx_device *card) -{ - /* the firmware expects 20 in the size field!!! - thanks to Daniela */ - int err = x25_exec(card, X25_STATISTIC, 0, NULL, 20, NULL, 0); - - if (err) - return err; - - interruptible_sleep_on(&card->wait_stats); - - if (signal_pending(current)) - return -EINTR; - - card->wandev.stats.rx_packets = card->u.x.stats.n2_rx_frames; - card->wandev.stats.rx_over_errors = card->u.x.stats.rx_over_errors; - card->wandev.stats.rx_crc_errors = card->u.x.stats.rx_crc_errors; - card->wandev.stats.rx_length_errors = 0; /* not available from fw */ - card->wandev.stats.rx_frame_errors = 0; /* not available from fw */ - card->wandev.stats.rx_missed_errors = card->u.x.stats.rx_aborts; - card->wandev.stats.rx_dropped = 0; /* not available from fw */ - card->wandev.stats.rx_errors = 0; /* not available from fw */ - card->wandev.stats.tx_packets = card->u.x.stats.n2_tx_frames; - card->wandev.stats.tx_aborted_errors = card->u.x.stats.tx_aborts; - card->wandev.stats.tx_dropped = 0; /* not available from fw */ - card->wandev.stats.collisions = 0; /* not available from fw */ - card->wandev.stats.tx_errors = 0; /* not available from fw */ - - cycx_x25_dump_devs(&card->wandev); - - return 0; -} - -/* return the number of nibbles */ -static int byte_to_nibble(u8 *s, u8 *d, char *nibble) -{ - int i = 0; - - if (*nibble && *s) { - d[i] |= *s++ - '0'; - *nibble = 0; - ++i; - } - - while (*s) { - d[i] = (*s - '0') << 4; - if (*(s + 1)) - d[i] |= *(s + 1) - '0'; - else { - *nibble = 1; - break; - } - ++i; - s += 2; - } - - return i; -} - -static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble) -{ - if (nibble) { - *d++ = '0' + (*s++ & 0x0F); - --len; - } - - while (len) { - *d++ = '0' + (*s >> 4); - - if (--len) { - *d++ = '0' + (*s & 0x0F); - --len; - } else break; - - ++s; - } - - *d = '\0'; -} - -/* Place X.25 call. */ -static int x25_place_call(struct cycx_device *card, - struct cycx_x25_channel *chan) -{ - int err = 0, - len; - char d[64], - nibble = 0, - mylen = chan->local_addr ? strlen(chan->local_addr) : 0, - remotelen = strlen(chan->addr); - u8 key; - - if (card->u.x.connection_keys == ~0U) { - pr_info("%s: too many simultaneous connection requests!\n", - card->devname); - return -EAGAIN; - } - - key = ffz(card->u.x.connection_keys); - set_bit(key, (void*)&card->u.x.connection_keys); - ++key; - dprintk(1, KERN_INFO "%s:x25_place_call:key=%d\n", card->devname, key); - memset(d, 0, sizeof(d)); - d[1] = key; /* user key */ - d[2] = 0x10; - d[4] = 0x0B; - - len = byte_to_nibble(chan->addr, d + 6, &nibble); - - if (chan->local_addr) - len += byte_to_nibble(chan->local_addr, d + 6 + len, &nibble); - - if (nibble) - ++len; - - d[5] = mylen << 4 | remotelen; - d[6 + len + 1] = 0xCC; /* TCP/IP over X.25, thanks to Daniela :) */ - - if ((err = x25_exec(card, X25_CONNECT_REQUEST, chan->link, - &d, 7 + len + 1, NULL, 0)) != 0) - clear_bit(--key, (void*)&card->u.x.connection_keys); - else - chan->lcn = -key; - - return err; -} - -/* Place X.25 CONNECT RESPONSE. */ -static int cycx_x25_connect_response(struct cycx_device *card, - struct cycx_x25_channel *chan) -{ - u8 d[8]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = chan->lcn; - d[2] = 0x10; - d[4] = 0x0F; - d[7] = 0xCC; /* TCP/IP over X.25, thanks Daniela */ - - return x25_exec(card, X25_CONNECT_RESPONSE, chan->link, &d, 8, NULL, 0); -} - -/* Place X.25 DISCONNECT RESPONSE. */ -static int cycx_x25_disconnect_response(struct cycx_device *card, u8 link, - u8 lcn) -{ - char d[5]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = lcn; - d[2] = 0x10; - d[4] = 0x17; - - return x25_exec(card, X25_DISCONNECT_RESPONSE, link, &d, 5, NULL, 0); -} - -/* Clear X.25 call. */ -static int x25_clear_call(struct cycx_device *card, u8 link, u8 lcn, u8 cause, - u8 diagn) -{ - u8 d[7]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = lcn; - d[2] = 0x10; - d[4] = 0x13; - d[5] = cause; - d[6] = diagn; - - return x25_exec(card, X25_DISCONNECT_REQUEST, link, d, 7, NULL, 0); -} - -/* Send X.25 data packet. */ -static int cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm, - int len, void *buf) -{ - u8 d[] = "?\xFF\x10??"; - - d[0] = d[3] = lcn; - d[4] = bitm; - - return x25_exec(card, X25_DATA_REQUEST, link, &d, 5, buf, len); -} - -/* Miscellaneous */ -/* Find network device by its channel number. */ -static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev, - s16 lcn) -{ - struct net_device *dev = wandev->dev; - struct cycx_x25_channel *chan; - - while (dev) { - chan = netdev_priv(dev); - - if (chan->lcn == lcn) - break; - dev = chan->slave; - } - return dev; -} - -/* Find network device by its remote dte address. */ -static struct net_device * - cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte) -{ - struct net_device *dev = wandev->dev; - struct cycx_x25_channel *chan; - - while (dev) { - chan = netdev_priv(dev); - - if (!strcmp(chan->addr, dte)) - break; - dev = chan->slave; - } - return dev; -} - -/* Initiate connection on the logical channel. - * o for PVC we just get channel configuration - * o for SVCs place an X.25 call - * - * Return: 0 connected - * >0 connection in progress - * <0 failure */ -static int cycx_x25_chan_connect(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - - if (chan->svc) { - if (!chan->addr[0]) - return -EINVAL; /* no destination address */ - - dprintk(1, KERN_INFO "%s: placing X.25 call to %s...\n", - card->devname, chan->addr); - - if (x25_place_call(card, chan)) - return -EIO; - - cycx_x25_set_chan_state(dev, WAN_CONNECTING); - return 1; - } else - cycx_x25_set_chan_state(dev, WAN_CONNECTED); - - return 0; -} - -/* Disconnect logical channel. - * o if SVC then clear X.25 call */ -static void cycx_x25_chan_disconnect(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) { - x25_clear_call(chan->card, chan->link, chan->lcn, 0, 0); - cycx_x25_set_chan_state(dev, WAN_DISCONNECTING); - } else - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); -} - -/* Called by kernel timer */ -static void cycx_x25_chan_timer(unsigned long d) -{ - struct net_device *dev = (struct net_device *)d; - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->state == WAN_CONNECTED) - cycx_x25_chan_disconnect(dev); - else - pr_err("%s: %s for svc (%s) not connected!\n", - chan->card->devname, __func__, dev->name); -} - -/* Set logical channel state. */ -static void cycx_x25_set_chan_state(struct net_device *dev, u8 state) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - unsigned long flags; - char *string_state = NULL; - - spin_lock_irqsave(&card->lock, flags); - - if (chan->state != state) { - if (chan->svc && chan->state == WAN_CONNECTED) - del_timer(&chan->timer); - - switch (state) { - case WAN_CONNECTED: - string_state = "connected!"; - *(__be16*)dev->dev_addr = htons(chan->lcn); - netif_wake_queue(dev); - reset_timer(dev); - - if (chan->protocol == ETH_P_X25) - cycx_x25_chan_send_event(dev, - X25_IFACE_CONNECT); - - break; - case WAN_CONNECTING: - string_state = "connecting..."; - break; - case WAN_DISCONNECTING: - string_state = "disconnecting..."; - break; - case WAN_DISCONNECTED: - string_state = "disconnected!"; - - if (chan->svc) { - *(unsigned short*)dev->dev_addr = 0; - chan->lcn = 0; - } - - if (chan->protocol == ETH_P_X25) - cycx_x25_chan_send_event(dev, - X25_IFACE_DISCONNECT); - - netif_wake_queue(dev); - break; - } - - pr_info("%s: interface %s %s\n", - card->devname, dev->name, string_state); - chan->state = state; - } - - spin_unlock_irqrestore(&card->lock, flags); -} - -/* Send packet on a logical channel. - * When this function is called, tx_skb field of the channel data space - * points to the transmit socket buffer. When transmission is complete, - * release socket buffer and reset 'tbusy' flag. - * - * Return: 0 - transmission complete - * 1 - busy - * - * Notes: - * 1. If packet length is greater than MTU for this channel, we'll fragment - * the packet into 'complete sequence' using M-bit. - * 2. When transmission is complete, an event notification should be issued - * to the router. */ -static int cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - int bitm = 0; /* final packet */ - unsigned len = skb->len; - - if (skb->len > card->wandev.mtu) { - len = card->wandev.mtu; - bitm = 0x10; /* set M-bit (more data) */ - } - - if (cycx_x25_send(card, chan->link, chan->lcn, bitm, len, skb->data)) - return 1; - - if (bitm) { - skb_pull(skb, len); - return 1; - } - - ++chan->ifstats.tx_packets; - chan->ifstats.tx_bytes += len; - - return 0; -} - -/* Send event (connection, disconnection, etc) to X.25 socket layer */ - -static void cycx_x25_chan_send_event(struct net_device *dev, u8 event) -{ - struct sk_buff *skb; - unsigned char *ptr; - - if ((skb = dev_alloc_skb(1)) == NULL) { - pr_err("%s: out of memory\n", __func__); - return; - } - - ptr = skb_put(skb, 1); - *ptr = event; - - skb->protocol = x25_type_trans(skb, dev); - netif_rx(skb); -} - -/* Convert line speed in bps to a number used by cyclom 2x code. */ -static u8 bps_to_speed_code(u32 bps) -{ - u8 number = 0; /* defaults to the lowest (1200) speed ;> */ - - if (bps >= 512000) number = 8; - else if (bps >= 256000) number = 7; - else if (bps >= 64000) number = 6; - else if (bps >= 38400) number = 5; - else if (bps >= 19200) number = 4; - else if (bps >= 9600) number = 3; - else if (bps >= 4800) number = 2; - else if (bps >= 2400) number = 1; - - return number; -} - -/* log base 2 */ -static u8 cycx_log2(u32 n) -{ - u8 log = 0; - - if (!n) - return 0; - - while (n > 1) { - n >>= 1; - ++log; - } - - return log; -} - -/* Convert decimal string to unsigned integer. - * If len != 0 then only 'len' characters of the string are converted. */ -static unsigned dec_to_uint(u8 *str, int len) -{ - unsigned val = 0; - - if (!len) - len = strlen(str); - - for (; len && isdigit(*str); ++str, --len) - val = (val * 10) + (*str - (unsigned) '0'); - - return val; -} - -static void reset_timer(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) - mod_timer(&chan->timer, jiffies+chan->idle_tmout*HZ); -} -#ifdef CYCLOMX_X25_DEBUG -static void cycx_x25_dump_config(struct cycx_x25_config *conf) -{ - pr_info("X.25 configuration\n"); - pr_info("-----------------\n"); - pr_info("link number=%d\n", conf->link); - pr_info("line speed=%d\n", conf->speed); - pr_info("clock=%sternal\n", conf->clock == 8 ? "Ex" : "In"); - pr_info("# level 2 retransm.=%d\n", conf->n2); - pr_info("level 2 window=%d\n", conf->n2win); - pr_info("level 3 window=%d\n", conf->n3win); - pr_info("# logical channels=%d\n", conf->nvc); - pr_info("level 3 pkt len=%d\n", conf->pktlen); - pr_info("my address=%d\n", conf->locaddr); - pr_info("remote address=%d\n", conf->remaddr); - pr_info("t1=%d seconds\n", conf->t1); - pr_info("t2=%d seconds\n", conf->t2); - pr_info("t21=%d seconds\n", conf->t21); - pr_info("# PVCs=%d\n", conf->npvc); - pr_info("t23=%d seconds\n", conf->t23); - pr_info("flags=0x%x\n", conf->flags); -} - -static void cycx_x25_dump_stats(struct cycx_x25_stats *stats) -{ - pr_info("X.25 statistics\n"); - pr_info("--------------\n"); - pr_info("rx_crc_errors=%d\n", stats->rx_crc_errors); - pr_info("rx_over_errors=%d\n", stats->rx_over_errors); - pr_info("n2_tx_frames=%d\n", stats->n2_tx_frames); - pr_info("n2_rx_frames=%d\n", stats->n2_rx_frames); - pr_info("tx_timeouts=%d\n", stats->tx_timeouts); - pr_info("rx_timeouts=%d\n", stats->rx_timeouts); - pr_info("n3_tx_packets=%d\n", stats->n3_tx_packets); - pr_info("n3_rx_packets=%d\n", stats->n3_rx_packets); - pr_info("tx_aborts=%d\n", stats->tx_aborts); - pr_info("rx_aborts=%d\n", stats->rx_aborts); -} - -static void cycx_x25_dump_devs(struct wan_device *wandev) -{ - struct net_device *dev = wandev->dev; - - pr_info("X.25 dev states\n"); - pr_info("name: addr: txoff: protocol:\n"); - pr_info("---------------------------------------\n"); - - while(dev) { - struct cycx_x25_channel *chan = netdev_priv(dev); - - pr_info("%-5.5s %-15.15s %d ETH_P_%s\n", - chan->name, chan->addr, netif_queue_stopped(dev), - chan->protocol == ETH_P_IP ? "IP" : "X25"); - dev = chan->slave; - } -} - -#endif /* CYCLOMX_X25_DEBUG */ -/* End */ diff --git a/include/linux/cyclomx.h b/include/linux/cyclomx.h deleted file mode 100644 index b88f7f428e58..000000000000 --- a/include/linux/cyclomx.h +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef _CYCLOMX_H -#define _CYCLOMX_H -/* -* cyclomx.h Cyclom 2X WAN Link Driver. -* User-level API definitions. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on wanpipe.h by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 2000/07/13 acme remove crap #if KERNEL_VERSION > blah -* 2000/01/21 acme rename cyclomx_open to cyclomx_mod_inc_use_count -* and cyclomx_close to cyclomx_mod_dec_use_count -* 1999/05/19 acme wait_queue_head_t wait_stats(support for 2.3.*) -* 1999/01/03 acme judicious use of data types -* 1998/12/27 acme cleanup: PACKED not needed -* 1998/08/08 acme Version 0.0.1 -*/ - -#include -#include - -#ifdef __KERNEL__ -/* Kernel Interface */ - -#include /* Cyclom 2X support module API definitions */ -#include /* Cyclom 2X firmware module definitions */ -#ifdef CONFIG_CYCLOMX_X25 -#include -#endif - -/* Adapter Data Space. - * This structure is needed because we handle multiple cards, otherwise - * static data would do it. - */ -struct cycx_device { - char devname[WAN_DRVNAME_SZ + 1];/* card name */ - struct cycx_hw hw; /* hardware configuration */ - struct wan_device wandev; /* WAN device data space */ - u32 state_tick; /* link state timestamp */ - spinlock_t lock; - char in_isr; /* interrupt-in-service flag */ - char buff_int_mode_unbusy; /* flag for carrying out dev_tint */ - wait_queue_head_t wait_stats; /* to wait for the STATS indication */ - void __iomem *mbox; /* -> mailbox */ - void (*isr)(struct cycx_device* card); /* interrupt service routine */ - int (*exec)(struct cycx_device* card, void* u_cmd, void* u_data); - union { -#ifdef CONFIG_CYCLOMX_X25 - struct { /* X.25 specific data */ - u32 lo_pvc; - u32 hi_pvc; - u32 lo_svc; - u32 hi_svc; - struct cycx_x25_stats stats; - spinlock_t lock; - u32 connection_keys; - } x; -#endif - } u; -}; - -/* Public Functions */ -void cycx_set_state(struct cycx_device *card, int state); - -#ifdef CONFIG_CYCLOMX_X25 -int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf); -#endif -#endif /* __KERNEL__ */ -#endif /* _CYCLOMX_H */ diff --git a/include/linux/cycx_drv.h b/include/linux/cycx_drv.h deleted file mode 100644 index 12fe6b0bfcff..000000000000 --- a/include/linux/cycx_drv.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -* cycx_drv.h CYCX Support Module. Kernel API Definitions. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdladrv.h by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 1999/10/23 acme cycxhw_t cleanup -* 1999/01/03 acme more judicious use of data types... -* uclong, ucchar, etc deleted, the u8, u16, u32 -* types are the portable way to go. -* 1999/01/03 acme judicious use of data types... u16, u32, etc -* 1998/12/26 acme FIXED_BUFFERS, CONF_OFFSET, -* removal of cy_read{bwl} -* 1998/08/08 acme Initial version. -*/ -#ifndef _CYCX_DRV_H -#define _CYCX_DRV_H - -#define CYCX_WINDOWSIZE 0x4000 /* default dual-port memory window size */ -#define GEN_CYCX_INTR 0x02 -#define RST_ENABLE 0x04 -#define START_CPU 0x06 -#define RST_DISABLE 0x08 -#define FIXED_BUFFERS 0x08 -#define TEST_PATTERN 0xaa55 -#define CMD_OFFSET 0x20 -#define CONF_OFFSET 0x0380 -#define RESET_OFFSET 0x3c00 /* For reset file load */ -#define DATA_OFFSET 0x0100 /* For code and data files load */ -#define START_OFFSET 0x3ff0 /* 80186 starts here */ - -/** - * struct cycx_hw - Adapter hardware configuration - * @fwid - firmware ID - * @irq - interrupt request level - * @dpmbase - dual-port memory base - * @dpmsize - dual-port memory size - * @reserved - reserved for future use - */ -struct cycx_hw { - u32 fwid; - int irq; - void __iomem *dpmbase; - u32 dpmsize; - u32 reserved[5]; -}; - -/* Function Prototypes */ -extern int cycx_setup(struct cycx_hw *hw, void *sfm, u32 len, unsigned long base); -extern int cycx_down(struct cycx_hw *hw); -extern int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len); -extern int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len); -extern int cycx_exec(void __iomem *addr); - -extern void cycx_intr(struct cycx_hw *hw); -#endif /* _CYCX_DRV_H */ diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h index cec4b4159767..8198a63cf459 100644 --- a/include/linux/wanrouter.h +++ b/include/linux/wanrouter.h @@ -1,129 +1,10 @@ -/***************************************************************************** -* wanrouter.h Definitions for the WAN Multiprotocol Router Module. -* This module provides API and common services for WAN Link -* Drivers and is completely hardware-independent. -* -* Author: Nenad Corbic -* Gideon Hack -* Additions: Arnaldo Melo -* -* Copyright: (c) 1995-2000 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jul 21, 2000 Nenad Corbic Added WAN_FT1_READY State -* Feb 24, 2000 Nenad Corbic Added support for socket based x25api -* Jan 28, 2000 Nenad Corbic Added support for the ASYNC protocol. -* Oct 04, 1999 Nenad Corbic Updated for 2.1.0 release -* Jun 02, 1999 Gideon Hack Added support for the S514 adapter. -* May 23, 1999 Arnaldo Melo Added local_addr to wanif_conf_t -* WAN_DISCONNECTING state added -* Jul 20, 1998 David Fong Added Inverse ARP options to 'wanif_conf_t' -* Jun 12, 1998 David Fong Added Cisco HDLC support. -* Dec 16, 1997 Jaspreet Singh Moved 'enable_IPX' and 'network_number' to -* 'wanif_conf_t' -* Dec 05, 1997 Jaspreet Singh Added 'pap', 'chap' to 'wanif_conf_t' -* Added 'authenticator' to 'wan_ppp_conf_t' -* Nov 06, 1997 Jaspreet Singh Changed Router Driver version to 1.1 from 1.0 -* Oct 20, 1997 Jaspreet Singh Added 'cir','bc','be' and 'mc' to 'wanif_conf_t' -* Added 'enable_IPX' and 'network_number' to -* 'wan_device_t'. Also added defines for -* UDP PACKET TYPE, Interrupt test, critical values -* for RACE conditions. -* Oct 05, 1997 Jaspreet Singh Added 'dlci_num' and 'dlci[100]' to -* 'wan_fr_conf_t' to configure a list of dlci(s) -* for a NODE -* Jul 07, 1997 Jaspreet Singh Added 'ttl' to 'wandev_conf_t' & 'wan_device_t' -* May 29, 1997 Jaspreet Singh Added 'tx_int_enabled' to 'wan_device_t' -* May 21, 1997 Jaspreet Singh Added 'udp_port' to 'wan_device_t' -* Apr 25, 1997 Farhan Thawar Added 'udp_port' to 'wandev_conf_t' -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 02, 1997 Gene Kozin Initial version (based on wanpipe.h). -*****************************************************************************/ +/* + * wanrouter.h Legacy declarations kept around until X25 is removed + */ + #ifndef _ROUTER_H #define _ROUTER_H #include -/****** Kernel Interface ****************************************************/ - -#include /* support for device drivers */ -#include /* proc filesystem pragmatics */ -#include /* support for network drivers */ -#include /* Support for SMP Locking */ - -/*---------------------------------------------------------------------------- - * WAN device data space. - */ -struct wan_device { - unsigned magic; /* magic number */ - char* name; /* -> WAN device name (ASCIIZ) */ - void* private; /* -> driver private data */ - unsigned config_id; /* Configuration ID */ - /****** hardware configuration ******/ - unsigned ioport; /* adapter I/O port base #1 */ - char S514_cpu_no[1]; /* PCI CPU Number */ - unsigned char S514_slot_no; /* PCI Slot Number */ - unsigned long maddr; /* dual-port memory address */ - unsigned msize; /* dual-port memory size */ - int irq; /* interrupt request level */ - int dma; /* DMA request level */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* max physical transmit unit size */ - unsigned udp_port; /* UDP port for management */ - unsigned char ttl; /* Time To Live for UDP security */ - unsigned enable_tx_int; /* Transmit Interrupt enabled or not */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - char line_coding; /* NRZ/NRZI/FM0/FM1, etc. */ - char station; /* DTE/DCE, primary/secondary, etc. */ - char connection; /* permanent/switched/on-demand */ - char signalling; /* Signalling RS232 or V35 */ - char read_mode; /* read mode: Polling or interrupt */ - char new_if_cnt; /* Number of interfaces per wanpipe */ - char del_if_cnt; /* Number of times del_if() gets called */ - unsigned char piggyback; /* Piggibacking a port */ - unsigned hw_opt[4]; /* other hardware options */ - /****** status and statistics *******/ - char state; /* device state */ - char api_status; /* device api status */ - struct net_device_stats stats; /* interface statistics */ - unsigned reserved[16]; /* reserved for future use */ - unsigned long critical; /* critical section flag */ - spinlock_t lock; /* Support for SMP Locking */ - - /****** device management methods ***/ - int (*setup) (struct wan_device *wandev, wandev_conf_t *conf); - int (*shutdown) (struct wan_device *wandev); - int (*update) (struct wan_device *wandev); - int (*ioctl) (struct wan_device *wandev, unsigned cmd, - unsigned long arg); - int (*new_if)(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf); - int (*del_if)(struct wan_device *wandev, struct net_device *dev); - /****** maintained by the router ****/ - struct wan_device* next; /* -> next device */ - struct net_device* dev; /* list of network interfaces */ - unsigned ndev; /* number of interfaces */ - struct proc_dir_entry *dent; /* proc filesystem entry */ -}; - -/* Public functions available for device drivers */ -extern int register_wan_device(struct wan_device *wandev); -extern int unregister_wan_device(char *name); - -/* Proc interface functions. These must not be called by the drivers! */ -extern int wanrouter_proc_init(void); -extern void wanrouter_proc_cleanup(void); -extern int wanrouter_proc_add(struct wan_device *wandev); -extern int wanrouter_proc_delete(struct wan_device *wandev); -extern long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg); - -/* Public Data */ -/* list of registered devices */ -extern struct wan_device *wanrouter_router_devlist; - #endif /* _ROUTER_H */ diff --git a/include/uapi/linux/wanrouter.h b/include/uapi/linux/wanrouter.h index 7617df2833d5..498d6c12c666 100644 --- a/include/uapi/linux/wanrouter.h +++ b/include/uapi/linux/wanrouter.h @@ -1,363 +1,9 @@ -/***************************************************************************** -* wanrouter.h Definitions for the WAN Multiprotocol Router Module. -* This module provides API and common services for WAN Link -* Drivers and is completely hardware-independent. -* -* Author: Nenad Corbic -* Gideon Hack -* Additions: Arnaldo Melo -* -* Copyright: (c) 1995-2000 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jul 21, 2000 Nenad Corbic Added WAN_FT1_READY State -* Feb 24, 2000 Nenad Corbic Added support for socket based x25api -* Jan 28, 2000 Nenad Corbic Added support for the ASYNC protocol. -* Oct 04, 1999 Nenad Corbic Updated for 2.1.0 release -* Jun 02, 1999 Gideon Hack Added support for the S514 adapter. -* May 23, 1999 Arnaldo Melo Added local_addr to wanif_conf_t -* WAN_DISCONNECTING state added -* Jul 20, 1998 David Fong Added Inverse ARP options to 'wanif_conf_t' -* Jun 12, 1998 David Fong Added Cisco HDLC support. -* Dec 16, 1997 Jaspreet Singh Moved 'enable_IPX' and 'network_number' to -* 'wanif_conf_t' -* Dec 05, 1997 Jaspreet Singh Added 'pap', 'chap' to 'wanif_conf_t' -* Added 'authenticator' to 'wan_ppp_conf_t' -* Nov 06, 1997 Jaspreet Singh Changed Router Driver version to 1.1 from 1.0 -* Oct 20, 1997 Jaspreet Singh Added 'cir','bc','be' and 'mc' to 'wanif_conf_t' -* Added 'enable_IPX' and 'network_number' to -* 'wan_device_t'. Also added defines for -* UDP PACKET TYPE, Interrupt test, critical values -* for RACE conditions. -* Oct 05, 1997 Jaspreet Singh Added 'dlci_num' and 'dlci[100]' to -* 'wan_fr_conf_t' to configure a list of dlci(s) -* for a NODE -* Jul 07, 1997 Jaspreet Singh Added 'ttl' to 'wandev_conf_t' & 'wan_device_t' -* May 29, 1997 Jaspreet Singh Added 'tx_int_enabled' to 'wan_device_t' -* May 21, 1997 Jaspreet Singh Added 'udp_port' to 'wan_device_t' -* Apr 25, 1997 Farhan Thawar Added 'udp_port' to 'wandev_conf_t' -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 02, 1997 Gene Kozin Initial version (based on wanpipe.h). -*****************************************************************************/ - -#ifndef _UAPI_ROUTER_H -#define _UAPI_ROUTER_H - -#define ROUTER_NAME "wanrouter" /* in case we ever change it */ -#define ROUTER_VERSION 1 /* version number */ -#define ROUTER_RELEASE 1 /* release (minor version) number */ -#define ROUTER_IOCTL 'W' /* for IOCTL calls */ -#define ROUTER_MAGIC 0x524D4157L /* signature: 'WANR' reversed */ - -/* IOCTL codes for /proc/router/ entries (up to 255) */ -enum router_ioctls -{ - ROUTER_SETUP = ROUTER_IOCTL<<8, /* configure device */ - ROUTER_DOWN, /* shut down device */ - ROUTER_STAT, /* get device status */ - ROUTER_IFNEW, /* add interface */ - ROUTER_IFDEL, /* delete interface */ - ROUTER_IFSTAT, /* get interface status */ - ROUTER_USER = (ROUTER_IOCTL<<8)+16, /* driver-specific calls */ - ROUTER_USER_MAX = (ROUTER_IOCTL<<8)+31 -}; - -/* identifiers for displaying proc file data for dual port adapters */ -#define PROC_DATA_PORT_0 0x8000 /* the data is for port 0 */ -#define PROC_DATA_PORT_1 0x8001 /* the data is for port 1 */ - -/* NLPID for packet encapsulation (ISO/IEC TR 9577) */ -#define NLPID_IP 0xCC /* Internet Protocol Datagram */ -#define NLPID_SNAP 0x80 /* IEEE Subnetwork Access Protocol */ -#define NLPID_CLNP 0x81 /* ISO/IEC 8473 */ -#define NLPID_ESIS 0x82 /* ISO/IEC 9542 */ -#define NLPID_ISIS 0x83 /* ISO/IEC ISIS */ -#define NLPID_Q933 0x08 /* CCITT Q.933 */ - -/* Miscellaneous */ -#define WAN_IFNAME_SZ 15 /* max length of the interface name */ -#define WAN_DRVNAME_SZ 15 /* max length of the link driver name */ -#define WAN_ADDRESS_SZ 31 /* max length of the WAN media address */ -#define USED_BY_FIELD 8 /* max length of the used by field */ - -/* Defines for UDP PACKET TYPE */ -#define UDP_PTPIPE_TYPE 0x01 -#define UDP_FPIPE_TYPE 0x02 -#define UDP_CPIPE_TYPE 0x03 -#define UDP_DRVSTATS_TYPE 0x04 -#define UDP_INVALID_TYPE 0x05 - -/* Command return code */ -#define CMD_OK 0 /* normal firmware return code */ -#define CMD_TIMEOUT 0xFF /* firmware command timed out */ - -/* UDP Packet Management */ -#define UDP_PKT_FRM_STACK 0x00 -#define UDP_PKT_FRM_NETWORK 0x01 - -/* Maximum interrupt test counter */ -#define MAX_INTR_TEST_COUNTER 100 - -/* Critical Values for RACE conditions*/ -#define CRITICAL_IN_ISR 0xA1 -#define CRITICAL_INTR_HANDLED 0xB1 - -/****** Data Types **********************************************************/ - -/*---------------------------------------------------------------------------- - * X.25-specific link-level configuration. - */ -typedef struct wan_x25_conf -{ - unsigned lo_pvc; /* lowest permanent circuit number */ - unsigned hi_pvc; /* highest permanent circuit number */ - unsigned lo_svc; /* lowest switched circuit number */ - unsigned hi_svc; /* highest switched circuit number */ - unsigned hdlc_window; /* HDLC window size (1..7) */ - unsigned pkt_window; /* X.25 packet window size (1..7) */ - unsigned t1; /* HDLC timer T1, sec (1..30) */ - unsigned t2; /* HDLC timer T2, sec (0..29) */ - unsigned t4; /* HDLC supervisory frame timer = T4 * T1 */ - unsigned n2; /* HDLC retransmission limit (1..30) */ - unsigned t10_t20; /* X.25 RESTART timeout, sec (1..255) */ - unsigned t11_t21; /* X.25 CALL timeout, sec (1..255) */ - unsigned t12_t22; /* X.25 RESET timeout, sec (1..255) */ - unsigned t13_t23; /* X.25 CLEAR timeout, sec (1..255) */ - unsigned t16_t26; /* X.25 INTERRUPT timeout, sec (1..255) */ - unsigned t28; /* X.25 REGISTRATION timeout, sec (1..255) */ - unsigned r10_r20; /* RESTART retransmission limit (0..250) */ - unsigned r12_r22; /* RESET retransmission limit (0..250) */ - unsigned r13_r23; /* CLEAR retransmission limit (0..250) */ - unsigned ccitt_compat; /* compatibility mode: 1988/1984/1980 */ - unsigned x25_conf_opt; /* User defined x25 config optoins */ - unsigned char LAPB_hdlc_only; /* Run in HDLC only mode */ - unsigned char logging; /* Control connection logging */ - unsigned char oob_on_modem; /* Whether to send modem status to the user app */ -} wan_x25_conf_t; - -/*---------------------------------------------------------------------------- - * Frame relay specific link-level configuration. - */ -typedef struct wan_fr_conf -{ - unsigned signalling; /* local in-channel signalling type */ - unsigned t391; /* link integrity verification timer */ - unsigned t392; /* polling verification timer */ - unsigned n391; /* full status polling cycle counter */ - unsigned n392; /* error threshold counter */ - unsigned n393; /* monitored events counter */ - unsigned dlci_num; /* number of DLCs (access node) */ - unsigned dlci[100]; /* List of all DLCIs */ -} wan_fr_conf_t; - -/*---------------------------------------------------------------------------- - * PPP-specific link-level configuration. - */ -typedef struct wan_ppp_conf -{ - unsigned restart_tmr; /* restart timer */ - unsigned auth_rsrt_tmr; /* authentication timer */ - unsigned auth_wait_tmr; /* authentication timer */ - unsigned mdm_fail_tmr; /* modem failure timer */ - unsigned dtr_drop_tmr; /* DTR drop timer */ - unsigned connect_tmout; /* connection timeout */ - unsigned conf_retry; /* max. retry */ - unsigned term_retry; /* max. retry */ - unsigned fail_retry; /* max. retry */ - unsigned auth_retry; /* max. retry */ - unsigned auth_options; /* authentication opt. */ - unsigned ip_options; /* IP options */ - char authenticator; /* AUTHENTICATOR or not */ - char ip_mode; /* Static/Host/Peer */ -} wan_ppp_conf_t; - -/*---------------------------------------------------------------------------- - * CHDLC-specific link-level configuration. - */ -typedef struct wan_chdlc_conf -{ - unsigned char ignore_dcd; /* Protocol options: */ - unsigned char ignore_cts; /* Ignore these to determine */ - unsigned char ignore_keepalive; /* link status (Yes or No) */ - unsigned char hdlc_streaming; /* hdlc_streaming mode (Y/N) */ - unsigned char receive_only; /* no transmit buffering (Y/N) */ - unsigned keepalive_tx_tmr; /* transmit keepalive timer */ - unsigned keepalive_rx_tmr; /* receive keepalive timer */ - unsigned keepalive_err_margin; /* keepalive_error_tolerance */ - unsigned slarp_timer; /* SLARP request timer */ -} wan_chdlc_conf_t; - - -/*---------------------------------------------------------------------------- - * WAN device configuration. Passed to ROUTER_SETUP IOCTL. - */ -typedef struct wandev_conf -{ - unsigned magic; /* magic number (for verification) */ - unsigned config_id; /* configuration structure identifier */ - /****** hardware configuration ******/ - unsigned ioport; /* adapter I/O port base */ - unsigned long maddr; /* dual-port memory address */ - unsigned msize; /* dual-port memory size */ - int irq; /* interrupt request level */ - int dma; /* DMA request level */ - char S514_CPU_no[1]; /* S514 PCI adapter CPU number ('A' or 'B') */ - unsigned PCI_slot_no; /* S514 PCI adapter slot number */ - char auto_pci_cfg; /* S515 PCI automatic slot detection */ - char comm_port; /* Communication Port (PRI=0, SEC=1) */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* maximum transmit unit size */ - unsigned udp_port; /* UDP port for management */ - unsigned char ttl; /* Time To Live for UDP security */ - unsigned char ft1; /* FT1 Configurator Option */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - char line_coding; /* NRZ/NRZI/FM0/FM1, etc. */ - char station; /* DTE/DCE, primary/secondary, etc. */ - char connection; /* permanent/switched/on-demand */ - char read_mode; /* read mode: Polling or interrupt */ - char receive_only; /* disable tx buffers */ - char tty; /* Create a fake tty device */ - unsigned tty_major; /* Major number for wanpipe tty device */ - unsigned tty_minor; /* Minor number for wanpipe tty device */ - unsigned tty_mode; /* TTY operation mode SYNC or ASYNC */ - char backup; /* Backup Mode */ - unsigned hw_opt[4]; /* other hardware options */ - unsigned reserved[4]; - /****** arbitrary data ***************/ - unsigned data_size; /* data buffer size */ - void* data; /* data buffer, e.g. firmware */ - union /****** protocol-specific ************/ - { - wan_x25_conf_t x25; /* X.25 configuration */ - wan_ppp_conf_t ppp; /* PPP configuration */ - wan_fr_conf_t fr; /* frame relay configuration */ - wan_chdlc_conf_t chdlc; /* Cisco HDLC configuration */ - } u; -} wandev_conf_t; - -/* 'config_id' definitions */ -#define WANCONFIG_X25 101 /* X.25 link */ -#define WANCONFIG_FR 102 /* frame relay link */ -#define WANCONFIG_PPP 103 /* synchronous PPP link */ -#define WANCONFIG_CHDLC 104 /* Cisco HDLC Link */ -#define WANCONFIG_BSC 105 /* BiSync Streaming */ -#define WANCONFIG_HDLC 106 /* HDLC Support */ -#define WANCONFIG_MPPP 107 /* Multi Port PPP over RAW CHDLC */ - /* - * Configuration options defines. + * wanrouter.h Legacy declarations kept around until X25 is removed */ -/* general options */ -#define WANOPT_OFF 0 -#define WANOPT_ON 1 -#define WANOPT_NO 0 -#define WANOPT_YES 1 - -/* intercace options */ -#define WANOPT_RS232 0 -#define WANOPT_V35 1 - -/* data encoding options */ -#define WANOPT_NRZ 0 -#define WANOPT_NRZI 1 -#define WANOPT_FM0 2 -#define WANOPT_FM1 3 - -/* link type options */ -#define WANOPT_POINTTOPOINT 0 /* RTS always active */ -#define WANOPT_MULTIDROP 1 /* RTS is active when transmitting */ - -/* clocking options */ -#define WANOPT_EXTERNAL 0 -#define WANOPT_INTERNAL 1 - -/* station options */ -#define WANOPT_DTE 0 -#define WANOPT_DCE 1 -#define WANOPT_CPE 0 -#define WANOPT_NODE 1 -#define WANOPT_SECONDARY 0 -#define WANOPT_PRIMARY 1 - -/* connection options */ -#define WANOPT_PERMANENT 0 /* DTR always active */ -#define WANOPT_SWITCHED 1 /* use DTR to setup link (dial-up) */ -#define WANOPT_ONDEMAND 2 /* activate DTR only before sending */ - -/* frame relay in-channel signalling */ -#define WANOPT_FR_ANSI 1 /* ANSI T1.617 Annex D */ -#define WANOPT_FR_Q933 2 /* ITU Q.933A */ -#define WANOPT_FR_LMI 3 /* LMI */ - -/* PPP IP Mode Options */ -#define WANOPT_PPP_STATIC 0 -#define WANOPT_PPP_HOST 1 -#define WANOPT_PPP_PEER 2 - -/* ASY Mode Options */ -#define WANOPT_ONE 1 -#define WANOPT_TWO 2 -#define WANOPT_ONE_AND_HALF 3 - -#define WANOPT_NONE 0 -#define WANOPT_ODD 1 -#define WANOPT_EVEN 2 - -/* CHDLC Protocol Options */ -/* DF Commented out for now. - -#define WANOPT_CHDLC_NO_DCD IGNORE_DCD_FOR_LINK_STAT -#define WANOPT_CHDLC_NO_CTS IGNORE_CTS_FOR_LINK_STAT -#define WANOPT_CHDLC_NO_KEEPALIVE IGNORE_KPALV_FOR_LINK_STAT -*/ - -/* Port options */ -#define WANOPT_PRI 0 -#define WANOPT_SEC 1 -/* read mode */ -#define WANOPT_INTR 0 -#define WANOPT_POLL 1 - -#define WANOPT_TTY_SYNC 0 -#define WANOPT_TTY_ASYNC 1 -/*---------------------------------------------------------------------------- - * WAN Link Status Info (for ROUTER_STAT IOCTL). - */ -typedef struct wandev_stat -{ - unsigned state; /* link state */ - unsigned ndev; /* number of configured interfaces */ - - /* link/interface configuration */ - unsigned connection; /* permanent/switched/on-demand */ - unsigned media_type; /* Frame relay/PPP/X.25/SDLC, etc. */ - unsigned mtu; /* max. transmit unit for this device */ - - /* physical level statistics */ - unsigned modem_status; /* modem status */ - unsigned rx_frames; /* received frames count */ - unsigned rx_overruns; /* receiver overrun error count */ - unsigned rx_crc_err; /* receive CRC error count */ - unsigned rx_aborts; /* received aborted frames count */ - unsigned rx_bad_length; /* unexpetedly long/short frames count */ - unsigned rx_dropped; /* frames discarded at device level */ - unsigned tx_frames; /* transmitted frames count */ - unsigned tx_underruns; /* aborted transmissions (underruns) count */ - unsigned tx_timeouts; /* transmission timeouts */ - unsigned tx_rejects; /* other transmit errors */ - - /* media level statistics */ - unsigned rx_bad_format; /* frames with invalid format */ - unsigned rx_bad_addr; /* frames with invalid media address */ - unsigned tx_retries; /* frames re-transmitted */ - unsigned reserved[16]; /* reserved for future use */ -} wandev_stat_t; +#ifndef _UAPI_ROUTER_H +#define _UAPI_ROUTER_H /* 'state' defines */ enum wan_states @@ -365,88 +11,7 @@ enum wan_states WAN_UNCONFIGURED, /* link/channel is not configured */ WAN_DISCONNECTED, /* link/channel is disconnected */ WAN_CONNECTING, /* connection is in progress */ - WAN_CONNECTED, /* link/channel is operational */ - WAN_LIMIT, /* for verification only */ - WAN_DUALPORT, /* for Dual Port cards */ - WAN_DISCONNECTING, - WAN_FT1_READY /* FT1 Configurator Ready */ + WAN_CONNECTED /* link/channel is operational */ }; -enum { - WAN_LOCAL_IP, - WAN_POINTOPOINT_IP, - WAN_NETMASK_IP, - WAN_BROADCAST_IP -}; - -/* 'modem_status' masks */ -#define WAN_MODEM_CTS 0x0001 /* CTS line active */ -#define WAN_MODEM_DCD 0x0002 /* DCD line active */ -#define WAN_MODEM_DTR 0x0010 /* DTR line active */ -#define WAN_MODEM_RTS 0x0020 /* RTS line active */ - -/*---------------------------------------------------------------------------- - * WAN interface (logical channel) configuration (for ROUTER_IFNEW IOCTL). - */ -typedef struct wanif_conf -{ - unsigned magic; /* magic number */ - unsigned config_id; /* configuration identifier */ - char name[WAN_IFNAME_SZ+1]; /* interface name, ASCIIZ */ - char addr[WAN_ADDRESS_SZ+1]; /* media address, ASCIIZ */ - char usedby[USED_BY_FIELD]; /* used by API or WANPIPE */ - unsigned idle_timeout; /* sec, before disconnecting */ - unsigned hold_timeout; /* sec, before re-connecting */ - unsigned cir; /* Committed Information Rate fwd,bwd*/ - unsigned bc; /* Committed Burst Size fwd, bwd */ - unsigned be; /* Excess Burst Size fwd, bwd */ - unsigned char enable_IPX; /* Enable or Disable IPX */ - unsigned char inarp; /* Send Inverse ARP requests Y/N */ - unsigned inarp_interval; /* sec, between InARP requests */ - unsigned long network_number; /* Network Number for IPX */ - char mc; /* Multicast on or off */ - char local_addr[WAN_ADDRESS_SZ+1];/* local media address, ASCIIZ */ - unsigned char port; /* board port */ - unsigned char protocol; /* prococol used in this channel (TCPOX25 or X25) */ - char pap; /* PAP enabled or disabled */ - char chap; /* CHAP enabled or disabled */ - unsigned char userid[511]; /* List of User Id */ - unsigned char passwd[511]; /* List of passwords */ - unsigned char sysname[31]; /* Name of the system */ - unsigned char ignore_dcd; /* Protocol options: */ - unsigned char ignore_cts; /* Ignore these to determine */ - unsigned char ignore_keepalive; /* link status (Yes or No) */ - unsigned char hdlc_streaming; /* Hdlc streaming mode (Y/N) */ - unsigned keepalive_tx_tmr; /* transmit keepalive timer */ - unsigned keepalive_rx_tmr; /* receive keepalive timer */ - unsigned keepalive_err_margin; /* keepalive_error_tolerance */ - unsigned slarp_timer; /* SLARP request timer */ - unsigned char ttl; /* Time To Live for UDP security */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* maximum transmit unit size */ - unsigned char if_down; /* brind down interface when disconnected */ - unsigned char gateway; /* Is this interface a gateway */ - unsigned char true_if_encoding; /* Set the dev->type to true board protocol */ - - unsigned char asy_data_trans; /* async API options */ - unsigned char rts_hs_for_receive; /* async Protocol options */ - unsigned char xon_xoff_hs_for_receive; - unsigned char xon_xoff_hs_for_transmit; - unsigned char dcd_hs_for_transmit; - unsigned char cts_hs_for_transmit; - unsigned char async_mode; - unsigned tx_bits_per_char; - unsigned rx_bits_per_char; - unsigned stop_bits; - unsigned char parity; - unsigned break_timer; - unsigned inter_char_timer; - unsigned rx_complete_length; - unsigned xon_char; - unsigned xoff_char; - unsigned char receive_only; /* no transmit buffering (Y/N) */ -} wanif_conf_t; - #endif /* _UAPI_ROUTER_H */ diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig deleted file mode 100644 index a157a2e64e18..000000000000 --- a/net/wanrouter/Kconfig +++ /dev/null @@ -1,27 +0,0 @@ -# -# Configuration for WAN router -# - -config WAN_ROUTER - tristate "WAN router (DEPRECATED)" - depends on EXPERIMENTAL - ---help--- - Wide Area Networks (WANs), such as X.25, frame relay and leased - lines, are used to interconnect Local Area Networks (LANs) over vast - distances with data transfer rates significantly higher than those - achievable with commonly used asynchronous modem connections. - Usually, a quite expensive external device called a `WAN router' is - needed to connect to a WAN. - - As an alternative, WAN routing can be built into the Linux kernel. - With relatively inexpensive WAN interface cards available on the - market, a perfectly usable router can be built for less than half - the price of an external router. If you have one of those cards and - wish to use your Linux box as a WAN router, say Y here and also to - the WAN driver for your card, below. You will then need the - wan-tools package which is available from . - - To compile WAN routing support as a module, choose M here: the - module will be called wanrouter. - - If unsure, say N. diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile deleted file mode 100644 index 4da14bc48078..000000000000 --- a/net/wanrouter/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux WAN router layer. -# - -obj-$(CONFIG_WAN_ROUTER) += wanrouter.o - -wanrouter-y := wanproc.o wanmain.o diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel deleted file mode 100644 index c043eea7767e..000000000000 --- a/net/wanrouter/patchlevel +++ /dev/null @@ -1 +0,0 @@ -2.2.1 diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c deleted file mode 100644 index 2ab785064b7e..000000000000 --- a/net/wanrouter/wanmain.c +++ /dev/null @@ -1,782 +0,0 @@ -/***************************************************************************** -* wanmain.c WAN Multiprotocol Router Module. Main code. -* -* This module is completely hardware-independent and provides -* the following common services for the WAN Link Drivers: -* o WAN device management (registering, unregistering) -* o Network interface management -* o Physical connection management (dial-up, incoming calls) -* o Logical connection management (switched virtual circuits) -* o Protocol encapsulation/decapsulation -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Nov 24, 2000 Nenad Corbic Updated for 2.4.X kernels -* Nov 07, 2000 Nenad Corbic Fixed the Mulit-Port PPP for kernels 2.2.16 and -* greater. -* Aug 2, 2000 Nenad Corbic Block the Multi-Port PPP from running on -* kernels 2.2.16 or greater. The SyncPPP -* has changed. -* Jul 13, 2000 Nenad Corbic Added SyncPPP support -* Added extra debugging in device_setup(). -* Oct 01, 1999 Gideon Hack Update for s514 PCI card -* Dec 27, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 31, 1997 Alan Cox Hacked it about a bit for 2.1 -* Jun 27, 1997 Alan Cox realigned with vendor code -* Oct 15, 1997 Farhan Thawar changed wan_encapsulate to add a pad byte of 0 -* Apr 20, 1998 Alan Cox Fixed 2.1 symbols -* May 17, 1998 K. Baranowski Fixed SNAP encapsulation in wan_encapsulate -* Dec 15, 1998 Arnaldo Melo support for firmwares of up to 128000 bytes -* check wandev->setup return value -* Dec 22, 1998 Arnaldo Melo vmalloc/vfree used in device_setup to allocate -* kernel memory and copy configuration data to -* kernel space (for big firmwares) -* Jun 02, 1999 Gideon Hack Updates for Linux 2.0.X and 2.2.X kernels. -*****************************************************************************/ - -#include /* offsetof(), etc. */ -#include -#include /* return codes */ -#include -#include /* support for loadable modules */ -#include /* kmalloc(), kfree() */ -#include -#include -#include /* inline mem*, str* functions */ - -#include /* htons(), etc. */ -#include /* WAN router API definitions */ - -#include /* vmalloc, vfree */ -#include /* copy_to/from_user */ -#include /* __initfunc et al. */ - -#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) - -/* - * Function Prototypes - */ - -/* - * WAN device IOCTL handlers - */ - -static DEFINE_MUTEX(wanrouter_mutex); -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf); -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat); -static int wanrouter_device_shutdown(struct wan_device *wandev); -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf); -static int wanrouter_device_del_if(struct wan_device *wandev, - char __user *u_name); - -/* - * Miscellaneous - */ - -static struct wan_device *wanrouter_find_device(char *name); -static int wanrouter_delete_interface(struct wan_device *wandev, char *name); -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock); -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock); - - - -/* - * Global Data - */ - -static char wanrouter_fullname[] = "Sangoma WANPIPE Router"; -static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc."; -static char wanrouter_modname[] = ROUTER_NAME; /* short module name */ -struct wan_device* wanrouter_router_devlist; /* list of registered devices */ - -/* - * Organize Unique Identifiers for encapsulation/decapsulation - */ - -#if 0 -static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 }; -static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 }; -#endif - -static int __init wanrouter_init(void) -{ - int err; - - printk(KERN_INFO "%s v%u.%u %s\n", - wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE, - wanrouter_copyright); - - err = wanrouter_proc_init(); - if (err) - printk(KERN_INFO "%s: can't create entry in proc filesystem!\n", - wanrouter_modname); - - return err; -} - -static void __exit wanrouter_cleanup (void) -{ - wanrouter_proc_cleanup(); -} - -/* - * This is just plain dumb. We should move the bugger to drivers/net/wan, - * slap it first in directory and make it module_init(). The only reason - * for subsys_initcall() here is that net goes after drivers (why, BTW?) - */ -subsys_initcall(wanrouter_init); -module_exit(wanrouter_cleanup); - -/* - * Kernel APIs - */ - -/* - * Register WAN device. - * o verify device credentials - * o create an entry for the device in the /proc/net/router directory - * o initialize internally maintained fields of the wan_device structure - * o link device data space to a singly-linked list - * o if it's the first device, then start kernel 'thread' - * o increment module use count - * - * Return: - * 0 Ok - * < 0 error. - * - * Context: process - */ - - -int register_wan_device(struct wan_device *wandev) -{ - int err, namelen; - - if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) || - (wandev->name == NULL)) - return -EINVAL; - - namelen = strlen(wandev->name); - if (!namelen || (namelen > WAN_DRVNAME_SZ)) - return -EINVAL; - - if (wanrouter_find_device(wandev->name)) - return -EEXIST; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering WAN device %s\n", - wanrouter_modname, wandev->name); -#endif - - /* - * Register /proc directory entry - */ - err = wanrouter_proc_add(wandev); - if (err) { - printk(KERN_INFO - "%s: can't create /proc/net/router/%s entry!\n", - wanrouter_modname, wandev->name); - return err; - } - - /* - * Initialize fields of the wan_device structure maintained by the - * router and update local data. - */ - - wandev->ndev = 0; - wandev->dev = NULL; - wandev->next = wanrouter_router_devlist; - wanrouter_router_devlist = wandev; - return 0; -} - -/* - * Unregister WAN device. - * o shut down device - * o unlink device data space from the linked list - * o delete device entry in the /proc/net/router directory - * o decrement module use count - * - * Return: 0 Ok - * <0 error. - * Context: process - */ - - -int unregister_wan_device(char *name) -{ - struct wan_device *wandev, *prev; - - if (name == NULL) - return -EINVAL; - - for (wandev = wanrouter_router_devlist, prev = NULL; - wandev && strcmp(wandev->name, name); - prev = wandev, wandev = wandev->next) - ; - if (wandev == NULL) - return -ENODEV; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: unregistering WAN device %s\n", - wanrouter_modname, name); -#endif - - if (wandev->state != WAN_UNCONFIGURED) - wanrouter_device_shutdown(wandev); - - if (prev) - prev->next = wandev->next; - else - wanrouter_router_devlist = wandev->next; - - wanrouter_proc_delete(wandev); - return 0; -} - -#if 0 - -/* - * Encapsulate packet. - * - * Return: encapsulation header size - * < 0 - unsupported Ethertype - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev, - unsigned short type) -{ - int hdr_len = 0; - - switch (type) { - case ETH_P_IP: /* IP datagram encapsulation */ - hdr_len += 1; - skb_push(skb, 1); - skb->data[0] = NLPID_IP; - break; - - case ETH_P_IPX: /* SNAP encapsulation */ - case ETH_P_ARP: - hdr_len += 7; - skb_push(skb, 7); - skb->data[0] = 0; - skb->data[1] = NLPID_SNAP; - skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether, - sizeof(wanrouter_oui_ether)); - *((unsigned short*)&skb->data[5]) = htons(type); - break; - - default: /* Unknown packet type */ - printk(KERN_INFO - "%s: unsupported Ethertype 0x%04X on interface %s!\n", - wanrouter_modname, type, dev->name); - hdr_len = -EINVAL; - } - return hdr_len; -} - - -/* - * Decapsulate packet. - * - * Return: Ethertype (in network order) - * 0 unknown encapsulation - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) -{ - int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */ - __be16 ethertype; - - switch (skb->data[cnt]) { - case NLPID_IP: /* IP datagramm */ - ethertype = htons(ETH_P_IP); - cnt += 1; - break; - - case NLPID_SNAP: /* SNAP encapsulation */ - if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether, - sizeof(wanrouter_oui_ether))){ - printk(KERN_INFO - "%s: unsupported SNAP OUI %02X-%02X-%02X " - "on interface %s!\n", wanrouter_modname, - skb->data[cnt+1], skb->data[cnt+2], - skb->data[cnt+3], dev->name); - return 0; - } - ethertype = *((__be16*)&skb->data[cnt+4]); - cnt += 6; - break; - - /* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */ - - default: - printk(KERN_INFO - "%s: unsupported NLPID 0x%02X on interface %s!\n", - wanrouter_modname, skb->data[cnt], dev->name); - return 0; - } - skb->protocol = ethertype; - skb->pkt_type = PACKET_HOST; /* Physically point to point */ - skb_pull(skb, cnt); - skb_reset_mac_header(skb); - return ethertype; -} - -#endif /* 0 */ - -/* - * WAN device IOCTL. - * o find WAN device associated with this node - * o execute requested action or pass command to the device driver - */ - -long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file->f_path.dentry->d_inode; - int err = 0; - struct proc_dir_entry *dent; - struct wan_device *wandev; - void __user *data = (void __user *)arg; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if ((cmd >> 8) != ROUTER_IOCTL) - return -EINVAL; - - dent = PDE(inode); - if ((dent == NULL) || (dent->data == NULL)) - return -EINVAL; - - wandev = dent->data; - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - mutex_lock(&wanrouter_mutex); - switch (cmd) { - case ROUTER_SETUP: - err = wanrouter_device_setup(wandev, data); - break; - - case ROUTER_DOWN: - err = wanrouter_device_shutdown(wandev); - break; - - case ROUTER_STAT: - err = wanrouter_device_stat(wandev, data); - break; - - case ROUTER_IFNEW: - err = wanrouter_device_new_if(wandev, data); - break; - - case ROUTER_IFDEL: - err = wanrouter_device_del_if(wandev, data); - break; - - case ROUTER_IFSTAT: - break; - - default: - if ((cmd >= ROUTER_USER) && - (cmd <= ROUTER_USER_MAX) && - wandev->ioctl) - err = wandev->ioctl(wandev, cmd, arg); - else err = -EINVAL; - } - mutex_unlock(&wanrouter_mutex); - return err; -} - -/* - * WAN Driver IOCTL Handlers - */ - -/* - * Setup WAN link device. - * o verify user address space - * o allocate kernel memory and copy configuration data to kernel space - * o if configuration data includes extension, copy it to kernel space too - * o call driver's setup() entry point - */ - -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf) -{ - void *data = NULL; - wandev_conf_t *conf; - int err = -EINVAL; - - if (wandev->setup == NULL) { /* Nothing to do ? */ - printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n", - wandev->name); - return 0; - } - - conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL); - if (conf == NULL){ - printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n", - wandev->name); - return -ENOBUFS; - } - - if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) { - printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n", - wandev->name); - kfree(conf); - return -EFAULT; - } - - if (conf->magic != ROUTER_MAGIC) { - kfree(conf); - printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n", - wandev->name); - return -EINVAL; - } - - if (conf->data_size && conf->data) { - if (conf->data_size > 128000) { - printk(KERN_INFO - "%s: ERROR, Invalid firmware data size %i !\n", - wandev->name, conf->data_size); - kfree(conf); - return -EINVAL; - } - - data = vmalloc(conf->data_size); - if (!data) { - printk(KERN_INFO - "%s: ERROR, Failed allocate kernel memory !\n", - wandev->name); - kfree(conf); - return -ENOBUFS; - } - if (!copy_from_user(data, conf->data, conf->data_size)) { - conf->data = data; - err = wandev->setup(wandev, conf); - } else { - printk(KERN_INFO - "%s: ERROR, Failed to copy from user data !\n", - wandev->name); - err = -EFAULT; - } - vfree(data); - } else { - printk(KERN_INFO - "%s: ERROR, No firmware found ! Firmware size = %i !\n", - wandev->name, conf->data_size); - } - - kfree(conf); - return err; -} - -/* - * Shutdown WAN device. - * o delete all not opened logical channels for this device - * o call driver's shutdown() entry point - */ - -static int wanrouter_device_shutdown(struct wan_device *wandev) -{ - struct net_device *dev; - int err=0; - - if (wandev->state == WAN_UNCONFIGURED) - return 0; - - printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name); - - for (dev = wandev->dev; dev;) { - err = wanrouter_delete_interface(wandev, dev->name); - if (err) - return err; - /* The above function deallocates the current dev - * structure. Therefore, we cannot use netdev_priv(dev) - * as the next element: wandev->dev points to the - * next element */ - dev = wandev->dev; - } - - if (wandev->ndev) - return -EBUSY; /* there are opened interfaces */ - - if (wandev->shutdown) - err=wandev->shutdown(wandev); - - return err; -} - -/* - * Get WAN device status & statistics. - */ - -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat) -{ - wandev_stat_t stat; - - memset(&stat, 0, sizeof(stat)); - - /* Ask device driver to update device statistics */ - if ((wandev->state != WAN_UNCONFIGURED) && wandev->update) - wandev->update(wandev); - - /* Fill out structure */ - stat.ndev = wandev->ndev; - stat.state = wandev->state; - - if (copy_to_user(u_stat, &stat, sizeof(stat))) - return -EFAULT; - - return 0; -} - -/* - * Create new WAN interface. - * o verify user address space - * o copy configuration data to kernel address space - * o allocate network interface data space - * o call driver's new_if() entry point - * o make sure there is no interface name conflict - * o register network interface - */ - -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf) -{ - wanif_conf_t *cnf; - struct net_device *dev = NULL; - int err; - - if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL)) - return -ENODEV; - - cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL); - if (!cnf) - return -ENOBUFS; - - err = -EFAULT; - if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t))) - goto out; - - err = -EINVAL; - if (cnf->magic != ROUTER_MAGIC) - goto out; - - if (cnf->config_id == WANCONFIG_MPPP) { - printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n", - wandev->name); - err = -EPROTONOSUPPORT; - goto out; - } else { - err = wandev->new_if(wandev, dev, cnf); - } - - if (!err) { - /* Register network interface. This will invoke init() - * function supplied by the driver. If device registered - * successfully, add it to the interface list. - */ - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering interface %s...\n", - wanrouter_modname, dev->name); -#endif - - err = register_netdev(dev); - if (!err) { - struct net_device *slave = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - - if (wandev->dev == NULL) { - wandev->dev = dev; - } else { - for (slave=wandev->dev; - DEV_TO_SLAVE(slave); - slave = DEV_TO_SLAVE(slave)) - DEV_TO_SLAVE(slave) = dev; - } - ++wandev->ndev; - - unlock_adapter_irq(&wandev->lock, &smp_flags); - err = 0; /* done !!! */ - goto out; - } - if (wandev->del_if) - wandev->del_if(wandev, dev); - free_netdev(dev); - } - -out: - kfree(cnf); - return err; -} - - -/* - * Delete WAN logical channel. - * o verify user address space - * o copy configuration data to kernel address space - */ - -static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name) -{ - char name[WAN_IFNAME_SZ + 1]; - int err = 0; - - if (wandev->state == WAN_UNCONFIGURED) - return -ENODEV; - - memset(name, 0, sizeof(name)); - - if (copy_from_user(name, u_name, WAN_IFNAME_SZ)) - return -EFAULT; - - err = wanrouter_delete_interface(wandev, name); - if (err) - return err; - - /* If last interface being deleted, shutdown card - * This helps with administration at leaf nodes - * (You can tell if the person at the other end of the phone - * has an interface configured) and avoids DoS vulnerabilities - * in binary driver files - this fixes a problem with the current - * Sangoma driver going into strange states when all the network - * interfaces are deleted and the link irrecoverably disconnected. - */ - - if (!wandev->ndev && wandev->shutdown) - err = wandev->shutdown(wandev); - - return err; -} - -/* - * Miscellaneous Functions - */ - -/* - * Find WAN device by name. - * Return pointer to the WAN device data space or NULL if device not found. - */ - -static struct wan_device *wanrouter_find_device(char *name) -{ - struct wan_device *wandev; - - for (wandev = wanrouter_router_devlist; - wandev && strcmp(wandev->name, name); - wandev = wandev->next); - return wandev; -} - -/* - * Delete WAN logical channel identified by its name. - * o find logical channel by its name - * o call driver's del_if() entry point - * o unregister network interface - * o unlink channel data space from linked list of channels - * o release channel data space - * - * Return: 0 success - * -ENODEV channel not found. - * -EBUSY interface is open - * - * Note: If (force != 0), then device will be destroyed even if interface - * associated with it is open. It's caller's responsibility to make - * sure that opened interfaces are not removed! - */ - -static int wanrouter_delete_interface(struct wan_device *wandev, char *name) -{ - struct net_device *dev = NULL, *prev = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - dev = wandev->dev; - prev = NULL; - while (dev && strcmp(name, dev->name)) { - struct net_device **slave = netdev_priv(dev); - prev = dev; - dev = *slave; - } - unlock_adapter_irq(&wandev->lock, &smp_flags); - - if (dev == NULL) - return -ENODEV; /* interface not found */ - - if (netif_running(dev)) - return -EBUSY; /* interface in use */ - - if (wandev->del_if) - wandev->del_if(wandev, dev); - - lock_adapter_irq(&wandev->lock, &smp_flags); - if (prev) { - struct net_device **prev_slave = netdev_priv(prev); - struct net_device **slave = netdev_priv(dev); - - *prev_slave = *slave; - } else { - struct net_device **slave = netdev_priv(dev); - wandev->dev = *slave; - } - --wandev->ndev; - unlock_adapter_irq(&wandev->lock, &smp_flags); - - printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name); - - unregister_netdev(dev); - - free_netdev(dev); - - return 0; -} - -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock) -{ - spin_lock_irqsave(lock, *smp_flags); -} - - -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock) -{ - spin_unlock_irqrestore(lock, *smp_flags); -} - -EXPORT_SYMBOL(register_wan_device); -EXPORT_SYMBOL(unregister_wan_device); - -MODULE_LICENSE("GPL"); - -/* - * End - */ diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c deleted file mode 100644 index c43612ee96bb..000000000000 --- a/net/wanrouter/wanproc.c +++ /dev/null @@ -1,380 +0,0 @@ -/***************************************************************************** -* wanproc.c WAN Router Module. /proc filesystem interface. -* -* This module is completely hardware-independent and provides -* access to the router using Linux /proc filesystem. -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jun 02, 1999 Gideon Hack Updates for Linux 2.2.X kernels. -* Jun 29, 1997 Alan Cox Merged with 1.0.3 vendor code -* Jan 29, 1997 Gene Kozin v1.0.1. Implemented /proc read routines -* Jan 30, 1997 Alan Cox Hacked around for 2.1 -* Dec 13, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -*****************************************************************************/ - -#include /* __initfunc et al. */ -#include /* offsetof(), etc. */ -#include /* return codes */ -#include -#include -#include /* WAN router API definitions */ -#include -#include - -#include -#include - -#define PROC_STATS_FORMAT "%30s: %12lu\n" - -/****** Defines and Macros **************************************************/ - -#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\ - (prot == WANCONFIG_X25) ? " X25" : \ - (prot == WANCONFIG_PPP) ? " PPP" : \ - (prot == WANCONFIG_CHDLC) ? " CHDLC": \ - (prot == WANCONFIG_MPPP) ? " MPPP" : \ - " Unknown" ) - -/****** Function Prototypes *************************************************/ - -#ifdef CONFIG_PROC_FS - -/* Miscellaneous */ - -/* - * Structures for interfacing with the /proc filesystem. - * Router creates its own directory /proc/net/router with the following - * entries: - * config device configuration - * status global device statistics - * entry for each WAN device - */ - -/* - * Generic /proc/net/router/ file and inode operations - */ - -/* - * /proc/net/router - */ - -static DEFINE_MUTEX(config_mutex); -static struct proc_dir_entry *proc_router; - -/* Strings */ - -/* - * Interface functions - */ - -/****** Proc filesystem entry points ****************************************/ - -/* - * Iterator - */ -static void *r_start(struct seq_file *m, loff_t *pos) -{ - struct wan_device *wandev; - loff_t l = *pos; - - mutex_lock(&config_mutex); - if (!l--) - return SEQ_START_TOKEN; - for (wandev = wanrouter_router_devlist; l-- && wandev; - wandev = wandev->next) - ; - return wandev; -} - -static void *r_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct wan_device *wandev = v; - (*pos)++; - return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next; -} - -static void r_stop(struct seq_file *m, void *v) -{ - mutex_unlock(&config_mutex); -} - -static int config_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name | port |IRQ|DMA| mem.addr |" - "mem.size|option1|option2|option3|option4\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n", - p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize, - p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]); - return 0; -} - -static int status_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name |protocol|station|interface|" - "clocking|baud rate| MTU |ndev|link state\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |", - p->name, - PROT_DECODE(p->config_id), - p->config_id == WANCONFIG_FR ? - (p->station ? "Node" : "CPE") : - (p->config_id == WANCONFIG_X25 ? - (p->station ? "DCE" : "DTE") : - ("N/A")), - p->interface ? "V.35" : "RS-232", - p->clocking ? "internal" : "external", - p->bps, - p->mtu, - p->ndev); - - switch (p->state) { - case WAN_UNCONFIGURED: - seq_printf(m, "%-12s\n", "unconfigured"); - break; - case WAN_DISCONNECTED: - seq_printf(m, "%-12s\n", "disconnected"); - break; - case WAN_CONNECTING: - seq_printf(m, "%-12s\n", "connecting"); - break; - case WAN_CONNECTED: - seq_printf(m, "%-12s\n", "connected"); - break; - default: - seq_printf(m, "%-12s\n", "invalid"); - break; - } - return 0; -} - -static const struct seq_operations config_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = config_show, -}; - -static const struct seq_operations status_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = status_show, -}; - -static int config_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &config_op); -} - -static int status_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &status_op); -} - -static const struct file_operations config_fops = { - .owner = THIS_MODULE, - .open = config_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations status_fops = { - .owner = THIS_MODULE, - .open = status_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int wandev_show(struct seq_file *m, void *v) -{ - struct wan_device *wandev = m->private; - - if (wandev->magic != ROUTER_MAGIC) - return 0; - - if (!wandev->state) { - seq_puts(m, "device is not configured!\n"); - return 0; - } - - /* Update device statistics */ - if (wandev->update) { - int err = wandev->update(wandev); - if (err == -EAGAIN) { - seq_puts(m, "Device is busy!\n"); - return 0; - } - if (err) { - seq_puts(m, "Device is not configured!\n"); - return 0; - } - } - - seq_printf(m, PROC_STATS_FORMAT, - "total packets received", wandev->stats.rx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total packets transmitted", wandev->stats.tx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes received", wandev->stats.rx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes transmitted", wandev->stats.tx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "bad packets received", wandev->stats.rx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "packet transmit problems", wandev->stats.tx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "received frames dropped", wandev->stats.rx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "transmit frames dropped", wandev->stats.tx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "multicast packets received", wandev->stats.multicast); - seq_printf(m, PROC_STATS_FORMAT, - "transmit collisions", wandev->stats.collisions); - seq_printf(m, PROC_STATS_FORMAT, - "receive length errors", wandev->stats.rx_length_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver overrun errors", wandev->stats.rx_over_errors); - seq_printf(m, PROC_STATS_FORMAT, - "CRC errors", wandev->stats.rx_crc_errors); - seq_printf(m, PROC_STATS_FORMAT, - "frame format errors (aborts)", wandev->stats.rx_frame_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver fifo overrun", wandev->stats.rx_fifo_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver missed packet", wandev->stats.rx_missed_errors); - seq_printf(m, PROC_STATS_FORMAT, - "aborted frames transmitted", wandev->stats.tx_aborted_errors); - return 0; -} - -static int wandev_open(struct inode *inode, struct file *file) -{ - return single_open(file, wandev_show, PDE(inode)->data); -} - -static const struct file_operations wandev_fops = { - .owner = THIS_MODULE, - .open = wandev_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .unlocked_ioctl = wanrouter_ioctl, -}; - -/* - * Initialize router proc interface. - */ - -int __init wanrouter_proc_init(void) -{ - struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); - if (!proc_router) - goto fail; - - p = proc_create("config", S_IRUGO, proc_router, &config_fops); - if (!p) - goto fail_config; - p = proc_create("status", S_IRUGO, proc_router, &status_fops); - if (!p) - goto fail_stat; - return 0; -fail_stat: - remove_proc_entry("config", proc_router); -fail_config: - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -fail: - return -ENOMEM; -} - -/* - * Clean up router proc interface. - */ - -void wanrouter_proc_cleanup(void) -{ - remove_proc_entry("config", proc_router); - remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -} - -/* - * Add directory entry for WAN device. - */ - -int wanrouter_proc_add(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - wandev->dent = proc_create(wandev->name, S_IRUGO, - proc_router, &wandev_fops); - if (!wandev->dent) - return -ENOMEM; - wandev->dent->data = wandev; - return 0; -} - -/* - * Delete directory entry for WAN device. - */ -int wanrouter_proc_delete(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - remove_proc_entry(wandev->name, proc_router); - return 0; -} - -#else - -/* - * No /proc - output stubs - */ - -int __init wanrouter_proc_init(void) -{ - return 0; -} - -void wanrouter_proc_cleanup(void) -{ -} - -int wanrouter_proc_add(struct wan_device *wandev) -{ - return 0; -} - -int wanrouter_proc_delete(struct wan_device *wandev) -{ - return 0; -} - -#endif - -/* - * End - */ - -- cgit v1.2.3 From 0415d291022543d83ee799e9ffee08d856bca6e8 Mon Sep 17 00:00:00 2001 From: Enke Chen Date: Mon, 4 Feb 2013 16:14:32 +0100 Subject: fuse: send poll events commit 626cf23660 "poll: add poll_requested_events()..." enabled us to send the requested events to the filesystem. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 1 + include/uapi/linux/fuse.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a010585b0a74..c8071768b950 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2167,6 +2167,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) return DEFAULT_POLLMASK; poll_wait(file, &ff->poll_wait, wait); + inarg.events = (__u32)poll_requested_events(wait); /* * Ask for notification iff there's someone waiting for it. diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 3451b6061e69..68619e9210b9 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -63,6 +63,7 @@ * * 7.21 * - add FUSE_READDIRPLUS + * - send the requested events in POLL request */ #ifndef _LINUX_FUSE_H @@ -585,7 +586,7 @@ struct fuse_poll_in { __u64 fh; __u64 kh; __u32 flags; - __u32 padding; + __u32 events; }; struct fuse_poll_out { -- cgit v1.2.3 From 42745e039312ab4672c60ec584651f0c74e8264f Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Mon, 4 Feb 2013 13:53:11 +0200 Subject: cfg80211: expand per-station byte counters to 64bit In per-station statistics, present 32bit counters are too small for practical purposes - with gigabit speeds, it get overlapped every few seconds. Expand counters in the struct station_info to be 64-bit. Driver can still fill only 32-bit and indicate in @filled only bits like STATION_INFO_[TR]X_BYTES; in case driver provides full 64-bit counter, it should also set in @filled bit STATION_INFO_[TR]RX_BYTES64 Netlink sends both 32-bit and 64-bit counters, if present, to not break userspace. Signed-off-by: Vladimir Kondratiev [change to also have 32-bit counters if driver advertises 64-bit] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++---- include/uapi/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 16 +++++++++++++--- 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 63599ab6005b..94a0810ef68e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -672,8 +672,10 @@ struct station_parameters { * @STATION_INFO_SIGNAL: @signal filled * @STATION_INFO_TX_BITRATE: @txrate fields are filled * (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs) - * @STATION_INFO_RX_PACKETS: @rx_packets filled - * @STATION_INFO_TX_PACKETS: @tx_packets filled + * @STATION_INFO_RX_PACKETS: @rx_packets filled with 32-bit value + * @STATION_INFO_TX_PACKETS: @tx_packets filled with 32-bit value + * @STATION_INFO_RX_PACKETS64: @rx_packets filled with 64-bit value + * @STATION_INFO_TX_PACKETS64: @tx_packets filled with 64-bit value * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled @@ -714,6 +716,8 @@ enum station_info_flags { STATION_INFO_LOCAL_PM = 1<<21, STATION_INFO_PEER_PM = 1<<22, STATION_INFO_NONPEER_PM = 1<<23, + STATION_INFO_RX_BYTES64 = 1<<24, + STATION_INFO_TX_BYTES64 = 1<<25, }; /** @@ -835,8 +839,8 @@ struct station_info { u32 filled; u32 connected_time; u32 inactive_time; - u32 rx_bytes; - u32 tx_bytes; + u64 rx_bytes; + u64 tx_bytes; u16 llid; u16 plid; u8 plink_state; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 225a65e72219..9a2ecdc4136c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1857,6 +1857,8 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_INACTIVE_TIME: time since last activity (u32, msecs) * @NL80211_STA_INFO_RX_BYTES: total received bytes (u32, from this station) * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station) + * @NL80211_STA_INFO_RX_BYTES64: total received bytes (u64, from this station) + * @NL80211_STA_INFO_TX_BYTES64: total transmitted bytes (u64, to this station) * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute * containing info as possible, see &enum nl80211_rate_info @@ -1909,6 +1911,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_LOCAL_PM, NL80211_STA_INFO_PEER_PM, NL80211_STA_INFO_NONPEER_PM, + NL80211_STA_INFO_RX_BYTES64, + NL80211_STA_INFO_TX_BYTES64, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d359734b6972..807d448e702e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3057,12 +3057,22 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_INACTIVE_TIME, sinfo->inactive_time)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_RX_BYTES) && + if ((sinfo->filled & (STATION_INFO_RX_BYTES | + STATION_INFO_RX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES, - sinfo->rx_bytes)) + (u32)sinfo->rx_bytes)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_BYTES) && + if ((sinfo->filled & (STATION_INFO_TX_BYTES | + NL80211_STA_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, + (u32)sinfo->tx_bytes)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_RX_BYTES64) && + nla_put_u64(msg, NL80211_STA_INFO_RX_BYTES64, + sinfo->rx_bytes)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_TX_BYTES64) && + nla_put_u64(msg, NL80211_STA_INFO_TX_BYTES64, sinfo->tx_bytes)) goto nla_put_failure; if ((sinfo->filled & STATION_INFO_LLID) && -- cgit v1.2.3 From 85f024401bf80746ae08b7fd5809a9b16accf0b1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 29 Jan 2013 17:54:44 -0800 Subject: serial_core: Fix type definition for PORT_BRCM_TRUMANAGE. It was mistakenly defined to be 24 instead of the next higher number 25. Reported-by: Alexander Shishkin Cc: Stephen Hurd Signed-off-by: Michael Chan Cc: stable # 3.8 Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 08464ef2c72c..b6a23a483d74 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -50,7 +50,7 @@ #define PORT_LPC3220 22 /* NXP LPC32xx SoC "Standard" UART */ #define PORT_8250_CIR 23 /* CIR infrared port, has its own driver */ #define PORT_XR17V35X 24 /* Exar XR17V35x UARTs */ -#define PORT_BRCM_TRUMANAGE 24 +#define PORT_BRCM_TRUMANAGE 25 #define PORT_MAX_8250 25 /* max port ID */ /* -- cgit v1.2.3 From d1beadd1cb649404bfa2c3d92f77dbcb15b712e5 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 28 Jan 2013 10:44:48 +0000 Subject: netfilter: xt_conntrack: Add flag to support aliases The patch adds the flag to denote the "state" alias as of the subset of the "conntrack" match. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_conntrack.h b/include/uapi/linux/netfilter/xt_conntrack.h index e3c041d54020..e5bd3083a843 100644 --- a/include/uapi/linux/netfilter/xt_conntrack.h +++ b/include/uapi/linux/netfilter/xt_conntrack.h @@ -31,6 +31,7 @@ enum { XT_CONNTRACK_REPLSRC_PORT = 1 << 10, XT_CONNTRACK_REPLDST_PORT = 1 << 11, XT_CONNTRACK_DIRECTION = 1 << 12, + XT_CONNTRACK_STATE_ALIAS = 1 << 13, }; struct xt_conntrack_mtinfo1 { -- cgit v1.2.3 From 5474f57f7d686ac918355419cb71496f835aaf5d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 30 Jan 2013 20:24:22 +0100 Subject: netfilter: xt_CT: add alias flag This patch adds the alias flag to support full NOTRACK target aliasing. Based on initial patch from Jozsef Kadlecsik. Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_CT.h | 6 +++++- net/netfilter/xt_CT.c | 32 +++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_CT.h b/include/uapi/linux/netfilter/xt_CT.h index a064b8af360c..5a688c1ca4d7 100644 --- a/include/uapi/linux/netfilter/xt_CT.h +++ b/include/uapi/linux/netfilter/xt_CT.h @@ -3,7 +3,11 @@ #include -#define XT_CT_NOTRACK 0x1 +enum { + XT_CT_NOTRACK = 1 << 0, + XT_CT_NOTRACK_ALIAS = 1 << 1, + XT_CT_MASK = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS, +}; struct xt_ct_target_info { __u16 flags; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index d69f1c7532f7..a60261cb0e80 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -185,9 +185,6 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct nf_conn *ct; int ret = -EOPNOTSUPP; - if (info->flags & ~XT_CT_NOTRACK) - return -EINVAL; - if (info->flags & XT_CT_NOTRACK) { ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); @@ -256,6 +253,9 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) }; int ret; + if (info->flags & ~XT_CT_NOTRACK) + return -EINVAL; + memcpy(info_v1.helper, info->helper, sizeof(info->helper)); ret = xt_ct_tg_check(par, &info_v1); @@ -269,6 +269,21 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) { + struct xt_ct_target_info_v1 *info = par->targinfo; + + if (info->flags & ~XT_CT_NOTRACK) + return -EINVAL; + + return xt_ct_tg_check(par, par->targinfo); +} + +static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info_v1 *info = par->targinfo; + + if (info->flags & ~XT_CT_MASK) + return -EINVAL; + return xt_ct_tg_check(par, par->targinfo); } @@ -350,6 +365,17 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .table = "raw", .me = THIS_MODULE, }, + { + .name = "CT", + .family = NFPROTO_UNSPEC, + .revision = 2, + .targetsize = sizeof(struct xt_ct_target_info_v1), + .checkentry = xt_ct_tg_check_v2, + .destroy = xt_ct_tg_destroy_v1, + .target = xt_ct_target_v1, + .table = "raw", + .me = THIS_MODULE, + }, }; static unsigned int -- cgit v1.2.3 From 2ccbe779bcdee130ea7f1525670dc9d60318a981 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Sat, 19 Jan 2013 15:51:55 -0300 Subject: [media] v4l2-ctrl: Add helper function for the controls range update This patch adds a helper function that allows to modify range, i.e. minimum, maximum, step and default value of a v4l2 control, after the control has been created and initialized. This is helpful in situations when range of a control depends on user configurable parameters, e.g. camera sensor absolute exposure time depending on an output image resolution and frame rate. v4l2_ctrl_modify_range() function allows to modify range of an INTEGER, BOOL, MENU, INTEGER_MENU and BITMASK type controls. Based on a patch from Hans Verkuil http://patchwork.linuxtv.org/patch/8654. Signed-off-by: Sylwester Nawrocki Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/compat.xml | 4 + Documentation/DocBook/media/v4l/v4l2.xml | 4 +- Documentation/DocBook/media/v4l/vidioc-dqevent.xml | 6 + drivers/media/v4l2-core/v4l2-ctrls.c | 143 +++++++++++++++------ include/media/v4l2-ctrls.h | 20 +++ include/uapi/linux/videodev2.h | 1 + 6 files changed, 138 insertions(+), 40 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml index ebd2bfd1ee8e..104a1a2b8849 100644 --- a/Documentation/DocBook/media/v4l/compat.xml +++ b/Documentation/DocBook/media/v4l/compat.xml @@ -2486,6 +2486,10 @@ that used it. It was originally scheduled for removal in 2.6.35. v4l2_buffer. See . + + Added V4L2_EVENT_CTRL_CH_RANGE control event + changes flag. See . +
diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml index 8fe29427c8e4..c3851a2fb50d 100644 --- a/Documentation/DocBook/media/v4l/v4l2.xml +++ b/Documentation/DocBook/media/v4l/v4l2.xml @@ -142,10 +142,12 @@ applications. --> 3.9 2012-12-03 - sa + sa, sn Added timestamp types to v4l2_buffer, see . + Added V4L2_EVENT_CTRL_CH_RANGE control + event changes flag, see . diff --git a/Documentation/DocBook/media/v4l/vidioc-dqevent.xml b/Documentation/DocBook/media/v4l/vidioc-dqevent.xml index 98a856f9ec30..89891adb928a 100644 --- a/Documentation/DocBook/media/v4l/vidioc-dqevent.xml +++ b/Documentation/DocBook/media/v4l/vidioc-dqevent.xml @@ -261,6 +261,12 @@ This control event was triggered because the control flags changed. + + V4L2_EVENT_CTRL_CH_RANGE + 0x0004 + This control event was triggered because the minimum, + maximum, step or the default value of the control changed. + diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 7b486ac3f4d9..3f27571b814d 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -1158,8 +1158,7 @@ static int new_to_user(struct v4l2_ext_control *c, } /* Copy the new value to the current value. */ -static void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, - bool update_inactive) +static void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 ch_flags) { bool changed = false; @@ -1183,8 +1182,8 @@ static void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, ctrl->cur.val = ctrl->val; break; } - if (update_inactive) { - /* Note: update_inactive can only be true for auto clusters. */ + if (ch_flags & V4L2_EVENT_CTRL_CH_FLAGS) { + /* Note: CH_FLAGS is only set for auto clusters. */ ctrl->flags &= ~(V4L2_CTRL_FLAG_INACTIVE | V4L2_CTRL_FLAG_VOLATILE); if (!is_cur_manual(ctrl->cluster[0])) { @@ -1194,14 +1193,13 @@ static void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, } fh = NULL; } - if (changed || update_inactive) { + if (changed || ch_flags) { /* If a control was changed that was not one of the controls modified by the application, then send the event to all. */ if (!ctrl->is_new) fh = NULL; send_event(fh, ctrl, - (changed ? V4L2_EVENT_CTRL_CH_VALUE : 0) | - (update_inactive ? V4L2_EVENT_CTRL_CH_FLAGS : 0)); + (changed ? V4L2_EVENT_CTRL_CH_VALUE : 0) | ch_flags); if (ctrl->call_notify && changed && ctrl->handler->notify) ctrl->handler->notify(ctrl, ctrl->handler->notify_priv); } @@ -1257,6 +1255,41 @@ static int cluster_changed(struct v4l2_ctrl *master) return diff; } +/* Control range checking */ +static int check_range(enum v4l2_ctrl_type type, + s32 min, s32 max, u32 step, s32 def) +{ + switch (type) { + case V4L2_CTRL_TYPE_BOOLEAN: + if (step != 1 || max > 1 || min < 0) + return -ERANGE; + /* fall through */ + case V4L2_CTRL_TYPE_INTEGER: + if (step <= 0 || min > max || def < min || def > max) + return -ERANGE; + return 0; + case V4L2_CTRL_TYPE_BITMASK: + if (step || min || !max || (def & ~max)) + return -ERANGE; + return 0; + case V4L2_CTRL_TYPE_MENU: + case V4L2_CTRL_TYPE_INTEGER_MENU: + if (min > max || def < min || def > max) + return -ERANGE; + /* Note: step == menu_skip_mask for menu controls. + So here we check if the default value is masked out. */ + if (step && ((1 << def) & step)) + return -EINVAL; + return 0; + case V4L2_CTRL_TYPE_STRING: + if (min > max || min < 0 || step < 1 || def) + return -ERANGE; + return 0; + default: + return 0; + } +} + /* Validate a new control */ static int validate_new(const struct v4l2_ctrl *ctrl, struct v4l2_ext_control *c) @@ -1529,30 +1562,21 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl, { struct v4l2_ctrl *ctrl; unsigned sz_extra = 0; + int err; if (hdl->error) return NULL; /* Sanity checks */ if (id == 0 || name == NULL || id >= V4L2_CID_PRIVATE_BASE || - (type == V4L2_CTRL_TYPE_INTEGER && step == 0) || - (type == V4L2_CTRL_TYPE_BITMASK && max == 0) || (type == V4L2_CTRL_TYPE_MENU && qmenu == NULL) || - (type == V4L2_CTRL_TYPE_INTEGER_MENU && qmenu_int == NULL) || - (type == V4L2_CTRL_TYPE_STRING && max == 0)) { - handler_set_err(hdl, -ERANGE); - return NULL; - } - if (type != V4L2_CTRL_TYPE_BITMASK && max < min) { + (type == V4L2_CTRL_TYPE_INTEGER_MENU && qmenu_int == NULL)) { handler_set_err(hdl, -ERANGE); return NULL; } - if ((type == V4L2_CTRL_TYPE_INTEGER || - type == V4L2_CTRL_TYPE_MENU || - type == V4L2_CTRL_TYPE_INTEGER_MENU || - type == V4L2_CTRL_TYPE_BOOLEAN) && - (def < min || def > max)) { - handler_set_err(hdl, -ERANGE); + err = check_range(type, min, max, step, def); + if (err) { + handler_set_err(hdl, err); return NULL; } if (type == V4L2_CTRL_TYPE_BITMASK && ((def & ~max) || min || step)) { @@ -2426,8 +2450,8 @@ EXPORT_SYMBOL(v4l2_ctrl_g_ctrl_int64); /* Core function that calls try/s_ctrl and ensures that the new value is copied to the current value on a set. Must be called with ctrl->handler->lock held. */ -static int try_or_set_cluster(struct v4l2_fh *fh, - struct v4l2_ctrl *master, bool set) +static int try_or_set_cluster(struct v4l2_fh *fh, struct v4l2_ctrl *master, + bool set, u32 ch_flags) { bool update_flag; int ret; @@ -2465,7 +2489,8 @@ static int try_or_set_cluster(struct v4l2_fh *fh, /* If OK, then make the new values permanent. */ update_flag = is_cur_manual(master) != is_new_manual(master); for (i = 0; i < master->ncontrols; i++) - new_to_cur(fh, master->cluster[i], update_flag && i > 0); + new_to_cur(fh, master->cluster[i], ch_flags | + ((update_flag && i > 0) ? V4L2_EVENT_CTRL_CH_FLAGS : 0)); return 0; } @@ -2592,7 +2617,7 @@ static int try_set_ext_ctrls(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl, } while (!ret && idx); if (!ret) - ret = try_or_set_cluster(fh, master, set); + ret = try_or_set_cluster(fh, master, set, 0); /* Copy the new values back to userspace. */ if (!ret) { @@ -2638,10 +2663,9 @@ EXPORT_SYMBOL(v4l2_subdev_s_ext_ctrls); /* Helper function for VIDIOC_S_CTRL compatibility */ static int set_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, - struct v4l2_ext_control *c) + struct v4l2_ext_control *c, u32 ch_flags) { struct v4l2_ctrl *master = ctrl->cluster[0]; - int ret; int i; /* String controls are not supported. The user_to_new() and @@ -2651,12 +2675,6 @@ static int set_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, if (ctrl->type == V4L2_CTRL_TYPE_STRING) return -EINVAL; - ret = validate_new(ctrl, c); - if (ret) - return ret; - - v4l2_ctrl_lock(ctrl); - /* Reset the 'is_new' flags of the cluster */ for (i = 0; i < master->ncontrols; i++) if (master->cluster[i]) @@ -2670,10 +2688,22 @@ static int set_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, update_from_auto_cluster(master); user_to_new(c, ctrl); - ret = try_or_set_cluster(fh, master, true); - cur_to_user(c, ctrl); + return try_or_set_cluster(fh, master, true, ch_flags); +} - v4l2_ctrl_unlock(ctrl); +/* Helper function for VIDIOC_S_CTRL compatibility */ +static int set_ctrl_lock(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, + struct v4l2_ext_control *c) +{ + int ret = validate_new(ctrl, c); + + if (!ret) { + v4l2_ctrl_lock(ctrl); + ret = set_ctrl(fh, ctrl, c, 0); + if (!ret) + cur_to_user(c, ctrl); + v4l2_ctrl_unlock(ctrl); + } return ret; } @@ -2691,7 +2721,7 @@ int v4l2_s_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl, return -EACCES; c.value = control->value; - ret = set_ctrl(fh, ctrl, &c); + ret = set_ctrl_lock(fh, ctrl, &c); control->value = c.value; return ret; } @@ -2710,7 +2740,7 @@ int v4l2_ctrl_s_ctrl(struct v4l2_ctrl *ctrl, s32 val) /* It's a driver bug if this happens. */ WARN_ON(!type_is_int(ctrl)); c.value = val; - return set_ctrl(NULL, ctrl, &c); + return set_ctrl_lock(NULL, ctrl, &c); } EXPORT_SYMBOL(v4l2_ctrl_s_ctrl); @@ -2721,7 +2751,7 @@ int v4l2_ctrl_s_ctrl_int64(struct v4l2_ctrl *ctrl, s64 val) /* It's a driver bug if this happens. */ WARN_ON(ctrl->type != V4L2_CTRL_TYPE_INTEGER64); c.value64 = val; - return set_ctrl(NULL, ctrl, &c); + return set_ctrl_lock(NULL, ctrl, &c); } EXPORT_SYMBOL(v4l2_ctrl_s_ctrl_int64); @@ -2741,6 +2771,41 @@ void v4l2_ctrl_notify(struct v4l2_ctrl *ctrl, v4l2_ctrl_notify_fnc notify, void } EXPORT_SYMBOL(v4l2_ctrl_notify); +int v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl, + s32 min, s32 max, u32 step, s32 def) +{ + int ret = check_range(ctrl->type, min, max, step, def); + struct v4l2_ext_control c; + + switch (ctrl->type) { + case V4L2_CTRL_TYPE_INTEGER: + case V4L2_CTRL_TYPE_BOOLEAN: + case V4L2_CTRL_TYPE_MENU: + case V4L2_CTRL_TYPE_INTEGER_MENU: + case V4L2_CTRL_TYPE_BITMASK: + if (ret) + return ret; + break; + default: + return -EINVAL; + } + v4l2_ctrl_lock(ctrl); + ctrl->minimum = min; + ctrl->maximum = max; + ctrl->step = step; + ctrl->default_value = def; + c.value = ctrl->cur.val; + if (validate_new(ctrl, &c)) + c.value = def; + if (c.value != ctrl->cur.val) + ret = set_ctrl(NULL, ctrl, &c, V4L2_EVENT_CTRL_CH_RANGE); + else + send_event(NULL, ctrl, V4L2_EVENT_CTRL_CH_RANGE); + v4l2_ctrl_unlock(ctrl); + return ret; +} +EXPORT_SYMBOL(v4l2_ctrl_modify_range); + static int v4l2_ctrl_add_event(struct v4l2_subscribed_event *sev, unsigned elems) { struct v4l2_ctrl *ctrl = v4l2_ctrl_find(sev->fh->ctrl_handler, sev->id); diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index c4cc04136074..91125b6f05a5 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -518,6 +518,26 @@ void v4l2_ctrl_activate(struct v4l2_ctrl *ctrl, bool active); */ void v4l2_ctrl_grab(struct v4l2_ctrl *ctrl, bool grabbed); +/** v4l2_ctrl_modify_range() - Update the range of a control. + * @ctrl: The control to update. + * @min: The control's minimum value. + * @max: The control's maximum value. + * @step: The control's step value + * @def: The control's default value. + * + * Update the range of a control on the fly. This works for control types + * INTEGER, BOOLEAN, MENU, INTEGER MENU and BITMASK. For menu controls the + * @step value is interpreted as a menu_skip_mask. + * + * An error is returned if one of the range arguments is invalid for this + * control type. + * + * This function assumes that the control handler is not locked and will + * take the lock itself. + */ +int v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl, + s32 min, s32 max, u32 step, s32 def); + /** v4l2_ctrl_lock() - Helper function to lock the handler * associated with the control. * @ctrl: The control to lock. diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 94cbe26e9f00..928799c2e2d9 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1822,6 +1822,7 @@ struct v4l2_event_vsync { /* Payload for V4L2_EVENT_CTRL */ #define V4L2_EVENT_CTRL_CH_VALUE (1 << 0) #define V4L2_EVENT_CTRL_CH_FLAGS (1 << 1) +#define V4L2_EVENT_CTRL_CH_RANGE (1 << 2) struct v4l2_event_ctrl { __u32 changes; -- cgit v1.2.3 From 28718152e0a78085297ec7705f53869e41d1ae73 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 24 Jan 2013 04:42:05 -0300 Subject: [media] Move DV-class control IDs from videodev2.h to v4l2-controls.h When the control IDs were split off from videodev2.h to v4l2-controls.h these new Digital Video controls were forgotten (the two patches may have crossed one another). Move these controls to their proper place in v4l2-controls.h. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 24 ++++++++++++++++++++++++ include/uapi/linux/videodev2.h | 22 ---------------------- 2 files changed, 24 insertions(+), 22 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 4dc0822700fe..0bece06792d7 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -778,6 +778,7 @@ enum v4l2_jpeg_chroma_subsampling { #define V4L2_JPEG_ACTIVE_MARKER_DQT (1 << 17) #define V4L2_JPEG_ACTIVE_MARKER_DHT (1 << 18) + /* Image source controls */ #define V4L2_CID_IMAGE_SOURCE_CLASS_BASE (V4L2_CTRL_CLASS_IMAGE_SOURCE | 0x900) #define V4L2_CID_IMAGE_SOURCE_CLASS (V4L2_CTRL_CLASS_IMAGE_SOURCE | 1) @@ -796,4 +797,27 @@ enum v4l2_jpeg_chroma_subsampling { #define V4L2_CID_PIXEL_RATE (V4L2_CID_IMAGE_PROC_CLASS_BASE + 2) #define V4L2_CID_TEST_PATTERN (V4L2_CID_IMAGE_PROC_CLASS_BASE + 3) + +/* DV-class control IDs defined by V4L2 */ +#define V4L2_CID_DV_CLASS_BASE (V4L2_CTRL_CLASS_DV | 0x900) +#define V4L2_CID_DV_CLASS (V4L2_CTRL_CLASS_DV | 1) + +#define V4L2_CID_DV_TX_HOTPLUG (V4L2_CID_DV_CLASS_BASE + 1) +#define V4L2_CID_DV_TX_RXSENSE (V4L2_CID_DV_CLASS_BASE + 2) +#define V4L2_CID_DV_TX_EDID_PRESENT (V4L2_CID_DV_CLASS_BASE + 3) +#define V4L2_CID_DV_TX_MODE (V4L2_CID_DV_CLASS_BASE + 4) +enum v4l2_dv_tx_mode { + V4L2_DV_TX_MODE_DVI_D = 0, + V4L2_DV_TX_MODE_HDMI = 1, +}; +#define V4L2_CID_DV_TX_RGB_RANGE (V4L2_CID_DV_CLASS_BASE + 5) +enum v4l2_dv_rgb_range { + V4L2_DV_RGB_RANGE_AUTO = 0, + V4L2_DV_RGB_RANGE_LIMITED = 1, + V4L2_DV_RGB_RANGE_FULL = 2, +}; + +#define V4L2_CID_DV_RX_POWER_PRESENT (V4L2_CID_DV_CLASS_BASE + 100) +#define V4L2_CID_DV_RX_RGB_RANGE (V4L2_CID_DV_CLASS_BASE + 101) + #endif diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 928799c2e2d9..234d1d870914 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1354,28 +1354,6 @@ struct v4l2_querymenu { #define V4L2_CID_PRIVATE_BASE 0x08000000 -/* DV-class control IDs defined by V4L2 */ -#define V4L2_CID_DV_CLASS_BASE (V4L2_CTRL_CLASS_DV | 0x900) -#define V4L2_CID_DV_CLASS (V4L2_CTRL_CLASS_DV | 1) - -#define V4L2_CID_DV_TX_HOTPLUG (V4L2_CID_DV_CLASS_BASE + 1) -#define V4L2_CID_DV_TX_RXSENSE (V4L2_CID_DV_CLASS_BASE + 2) -#define V4L2_CID_DV_TX_EDID_PRESENT (V4L2_CID_DV_CLASS_BASE + 3) -#define V4L2_CID_DV_TX_MODE (V4L2_CID_DV_CLASS_BASE + 4) -enum v4l2_dv_tx_mode { - V4L2_DV_TX_MODE_DVI_D = 0, - V4L2_DV_TX_MODE_HDMI = 1, -}; -#define V4L2_CID_DV_TX_RGB_RANGE (V4L2_CID_DV_CLASS_BASE + 5) -enum v4l2_dv_rgb_range { - V4L2_DV_RGB_RANGE_AUTO = 0, - V4L2_DV_RGB_RANGE_LIMITED = 1, - V4L2_DV_RGB_RANGE_FULL = 2, -}; - -#define V4L2_CID_DV_RX_POWER_PRESENT (V4L2_CID_DV_CLASS_BASE + 100) -#define V4L2_CID_DV_RX_RGB_RANGE (V4L2_CID_DV_CLASS_BASE + 101) - /* * T U N I N G */ -- cgit v1.2.3 From ed986d1fee77bbbb62291a1db1c7edbb00d99515 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 29 Jan 2013 07:21:02 -0300 Subject: [media] meye: convert to the control framework Convert the meye driver to the control framework. Some private controls have been replaced with standardized controls (SHARPNESS and JPEGQUAL). The AGC control looks like it can be replaced by the AUTOGAIN control, but it isn't a boolean so I do not know how to interpret it. The FRAMERATE control looks like it can be replaced by S_PARM, but again, without knowing how to interpret it I decided to leave it alone. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/pci/meye/meye.c | 278 ++++++++++++------------------------- drivers/media/pci/meye/meye.h | 2 + include/uapi/linux/meye.h | 8 +- include/uapi/linux/v4l2-controls.h | 5 + 4 files changed, 99 insertions(+), 194 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c index 3b39deaa8f6b..7859c43479d7 100644 --- a/drivers/media/pci/meye/meye.c +++ b/drivers/media/pci/meye/meye.c @@ -35,6 +35,8 @@ #include #include #include +#include +#include #include #include #include @@ -865,7 +867,7 @@ static int meye_open(struct file *file) meye.grab_buffer[i].state = MEYE_BUF_UNUSED; kfifo_reset(&meye.grabq); kfifo_reset(&meye.doneq); - return 0; + return v4l2_fh_open(file); } static int meye_release(struct file *file) @@ -873,7 +875,7 @@ static int meye_release(struct file *file) mchip_hic_stop(); mchip_dma_free(); clear_bit(0, &meye.in_use); - return 0; + return v4l2_fh_release(file); } static int meyeioc_g_params(struct meye_params *p) @@ -1032,8 +1034,9 @@ static int vidioc_querycap(struct file *file, void *fh, cap->version = (MEYE_DRIVER_MAJORVERSION << 8) + MEYE_DRIVER_MINORVERSION; - cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | + cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING; + cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS; return 0; } @@ -1063,191 +1066,50 @@ static int vidioc_s_input(struct file *file, void *fh, unsigned int i) return 0; } -static int vidioc_queryctrl(struct file *file, void *fh, - struct v4l2_queryctrl *c) -{ - switch (c->id) { - - case V4L2_CID_BRIGHTNESS: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Brightness"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 32; - c->flags = 0; - break; - case V4L2_CID_HUE: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Hue"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 32; - c->flags = 0; - break; - case V4L2_CID_CONTRAST: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Contrast"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 32; - c->flags = 0; - break; - case V4L2_CID_SATURATION: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Saturation"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 32; - c->flags = 0; - break; - case V4L2_CID_AGC: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Agc"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 48; - c->flags = 0; - break; - case V4L2_CID_MEYE_SHARPNESS: - case V4L2_CID_SHARPNESS: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Sharpness"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 32; - - /* Continue to report legacy private SHARPNESS ctrl but - * say it is disabled in preference to ctrl in the spec - */ - c->flags = (c->id == V4L2_CID_SHARPNESS) ? 0 : - V4L2_CTRL_FLAG_DISABLED; - break; - case V4L2_CID_PICTURE: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Picture"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 0; - c->flags = 0; - break; - case V4L2_CID_JPEGQUAL: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "JPEG quality"); - c->minimum = 0; - c->maximum = 10; - c->step = 1; - c->default_value = 8; - c->flags = 0; - break; - case V4L2_CID_FRAMERATE: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Framerate"); - c->minimum = 0; - c->maximum = 31; - c->step = 1; - c->default_value = 0; - c->flags = 0; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *c) +static int meye_s_ctrl(struct v4l2_ctrl *ctrl) { mutex_lock(&meye.lock); - switch (c->id) { + switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERABRIGHTNESS, c->value); - meye.brightness = c->value << 10; + SONY_PIC_COMMAND_SETCAMERABRIGHTNESS, ctrl->val); + meye.brightness = ctrl->val << 10; break; case V4L2_CID_HUE: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERAHUE, c->value); - meye.hue = c->value << 10; + SONY_PIC_COMMAND_SETCAMERAHUE, ctrl->val); + meye.hue = ctrl->val << 10; break; case V4L2_CID_CONTRAST: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERACONTRAST, c->value); - meye.contrast = c->value << 10; + SONY_PIC_COMMAND_SETCAMERACONTRAST, ctrl->val); + meye.contrast = ctrl->val << 10; break; case V4L2_CID_SATURATION: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERACOLOR, c->value); - meye.colour = c->value << 10; + SONY_PIC_COMMAND_SETCAMERACOLOR, ctrl->val); + meye.colour = ctrl->val << 10; break; - case V4L2_CID_AGC: + case V4L2_CID_MEYE_AGC: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERAAGC, c->value); - meye.params.agc = c->value; + SONY_PIC_COMMAND_SETCAMERAAGC, ctrl->val); + meye.params.agc = ctrl->val; break; case V4L2_CID_SHARPNESS: - case V4L2_CID_MEYE_SHARPNESS: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERASHARPNESS, c->value); - meye.params.sharpness = c->value; + SONY_PIC_COMMAND_SETCAMERASHARPNESS, ctrl->val); + meye.params.sharpness = ctrl->val; break; - case V4L2_CID_PICTURE: + case V4L2_CID_MEYE_PICTURE: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERAPICTURE, c->value); - meye.params.picture = c->value; + SONY_PIC_COMMAND_SETCAMERAPICTURE, ctrl->val); + meye.params.picture = ctrl->val; break; - case V4L2_CID_JPEGQUAL: - meye.params.quality = c->value; + case V4L2_CID_JPEG_COMPRESSION_QUALITY: + meye.params.quality = ctrl->val; break; - case V4L2_CID_FRAMERATE: - meye.params.framerate = c->value; - break; - default: - mutex_unlock(&meye.lock); - return -EINVAL; - } - mutex_unlock(&meye.lock); - - return 0; -} - -static int vidioc_g_ctrl(struct file *file, void *fh, struct v4l2_control *c) -{ - mutex_lock(&meye.lock); - switch (c->id) { - case V4L2_CID_BRIGHTNESS: - c->value = meye.brightness >> 10; - break; - case V4L2_CID_HUE: - c->value = meye.hue >> 10; - break; - case V4L2_CID_CONTRAST: - c->value = meye.contrast >> 10; - break; - case V4L2_CID_SATURATION: - c->value = meye.colour >> 10; - break; - case V4L2_CID_AGC: - c->value = meye.params.agc; - break; - case V4L2_CID_SHARPNESS: - case V4L2_CID_MEYE_SHARPNESS: - c->value = meye.params.sharpness; - break; - case V4L2_CID_PICTURE: - c->value = meye.params.picture; - break; - case V4L2_CID_JPEGQUAL: - c->value = meye.params.quality; - break; - case V4L2_CID_FRAMERATE: - c->value = meye.params.framerate; + case V4L2_CID_MEYE_FRAMERATE: + meye.params.framerate = ctrl->val; break; default: mutex_unlock(&meye.lock); @@ -1577,12 +1439,12 @@ static long vidioc_default(struct file *file, void *fh, bool valid_prio, static unsigned int meye_poll(struct file *file, poll_table *wait) { - unsigned int res = 0; + unsigned int res = v4l2_ctrl_poll(file, wait); mutex_lock(&meye.lock); poll_wait(file, &meye.proc_list, wait); if (kfifo_len(&meye.doneq)) - res = POLLIN | POLLRDNORM; + res |= POLLIN | POLLRDNORM; mutex_unlock(&meye.lock); return res; } @@ -1669,9 +1531,6 @@ static const struct v4l2_ioctl_ops meye_ioctl_ops = { .vidioc_enum_input = vidioc_enum_input, .vidioc_g_input = vidioc_g_input, .vidioc_s_input = vidioc_s_input, - .vidioc_queryctrl = vidioc_queryctrl, - .vidioc_s_ctrl = vidioc_s_ctrl, - .vidioc_g_ctrl = vidioc_g_ctrl, .vidioc_enum_fmt_vid_cap = vidioc_enum_fmt_vid_cap, .vidioc_try_fmt_vid_cap = vidioc_try_fmt_vid_cap, .vidioc_g_fmt_vid_cap = vidioc_g_fmt_vid_cap, @@ -1682,6 +1541,9 @@ static const struct v4l2_ioctl_ops meye_ioctl_ops = { .vidioc_dqbuf = vidioc_dqbuf, .vidioc_streamon = vidioc_streamon, .vidioc_streamoff = vidioc_streamoff, + .vidioc_log_status = v4l2_ctrl_log_status, + .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, + .vidioc_unsubscribe_event = v4l2_event_unsubscribe, .vidioc_default = vidioc_default, }; @@ -1692,6 +1554,10 @@ static struct video_device meye_template = { .release = video_device_release, }; +static const struct v4l2_ctrl_ops meye_ctrl_ops = { + .s_ctrl = meye_s_ctrl, +}; + #ifdef CONFIG_PM static int meye_suspend(struct pci_dev *pdev, pm_message_t state) { @@ -1730,6 +1596,32 @@ static int meye_resume(struct pci_dev *pdev) static int meye_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) { + static const struct v4l2_ctrl_config ctrl_agc = { + .id = V4L2_CID_MEYE_AGC, + .type = V4L2_CTRL_TYPE_INTEGER, + .ops = &meye_ctrl_ops, + .name = "AGC", + .max = 63, + .step = 1, + .def = 48, + .flags = V4L2_CTRL_FLAG_SLIDER, + }; + static const struct v4l2_ctrl_config ctrl_picture = { + .id = V4L2_CID_MEYE_PICTURE, + .type = V4L2_CTRL_TYPE_INTEGER, + .ops = &meye_ctrl_ops, + .name = "Picture", + .max = 63, + .step = 1, + }; + static const struct v4l2_ctrl_config ctrl_framerate = { + .id = V4L2_CID_MEYE_FRAMERATE, + .type = V4L2_CTRL_TYPE_INTEGER, + .ops = &meye_ctrl_ops, + .name = "Framerate", + .max = 31, + .step = 1, + }; struct v4l2_device *v4l2_dev = &meye.v4l2_dev; int ret = -EBUSY; unsigned long mchip_adr; @@ -1833,24 +1725,31 @@ static int meye_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) mutex_init(&meye.lock); init_waitqueue_head(&meye.proc_list); - meye.brightness = 32 << 10; - meye.hue = 32 << 10; - meye.colour = 32 << 10; - meye.contrast = 32 << 10; - meye.params.subsample = 0; - meye.params.quality = 8; - meye.params.sharpness = 32; - meye.params.agc = 48; - meye.params.picture = 0; - meye.params.framerate = 0; - - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERABRIGHTNESS, 32); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERAHUE, 32); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERACOLOR, 32); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERACONTRAST, 32); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERASHARPNESS, 32); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERAPICTURE, 0); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERAAGC, 48); + + v4l2_ctrl_handler_init(&meye.hdl, 3); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_BRIGHTNESS, 0, 63, 1, 32); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_HUE, 0, 63, 1, 32); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_CONTRAST, 0, 63, 1, 32); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_SATURATION, 0, 63, 1, 32); + v4l2_ctrl_new_custom(&meye.hdl, &ctrl_agc, NULL); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_SHARPNESS, 0, 63, 1, 32); + v4l2_ctrl_new_custom(&meye.hdl, &ctrl_picture, NULL); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_JPEG_COMPRESSION_QUALITY, 0, 10, 1, 8); + v4l2_ctrl_new_custom(&meye.hdl, &ctrl_framerate, NULL); + if (meye.hdl.error) { + v4l2_err(v4l2_dev, "couldn't register controls\n"); + goto outvideoreg; + } + + v4l2_ctrl_handler_setup(&meye.hdl); + meye.vdev->ctrl_handler = &meye.hdl; + set_bit(V4L2_FL_USE_FH_PRIO, &meye.vdev->flags); if (video_register_device(meye.vdev, VFL_TYPE_GRABBER, video_nr) < 0) { @@ -1866,6 +1765,7 @@ static int meye_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) return 0; outvideoreg: + v4l2_ctrl_handler_free(&meye.hdl); free_irq(meye.mchip_irq, meye_irq); outreqirq: iounmap(meye.mchip_mmregs); diff --git a/drivers/media/pci/meye/meye.h b/drivers/media/pci/meye/meye.h index 4bdeb03f1644..6fed9274cfa5 100644 --- a/drivers/media/pci/meye/meye.h +++ b/drivers/media/pci/meye/meye.h @@ -39,6 +39,7 @@ #include #include #include +#include /****************************************************************************/ /* Motion JPEG chip registers */ @@ -290,6 +291,7 @@ struct meye_grab_buffer { /* Motion Eye device structure */ struct meye { struct v4l2_device v4l2_dev; /* Main v4l2_device struct */ + struct v4l2_ctrl_handler hdl; struct pci_dev *mchip_dev; /* pci device */ u8 mchip_irq; /* irq */ u8 mchip_mode; /* actual mchip mode: HIC_MODE... */ diff --git a/include/uapi/linux/meye.h b/include/uapi/linux/meye.h index 0dd49954f746..8ff50fe9e481 100644 --- a/include/uapi/linux/meye.h +++ b/include/uapi/linux/meye.h @@ -57,10 +57,8 @@ struct meye_params { #define MEYEIOC_STILLJCAPT _IOR ('v', BASE_VIDIOC_PRIVATE+5, int) /* V4L2 private controls */ -#define V4L2_CID_AGC V4L2_CID_PRIVATE_BASE -#define V4L2_CID_MEYE_SHARPNESS (V4L2_CID_PRIVATE_BASE + 1) -#define V4L2_CID_PICTURE (V4L2_CID_PRIVATE_BASE + 2) -#define V4L2_CID_JPEGQUAL (V4L2_CID_PRIVATE_BASE + 3) -#define V4L2_CID_FRAMERATE (V4L2_CID_PRIVATE_BASE + 4) +#define V4L2_CID_MEYE_AGC (V4L2_CID_USER_MEYE_BASE + 0) +#define V4L2_CID_MEYE_PICTURE (V4L2_CID_USER_MEYE_BASE + 1) +#define V4L2_CID_MEYE_FRAMERATE (V4L2_CID_USER_MEYE_BASE + 2) #endif diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 0bece06792d7..dcd63745e83a 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -140,6 +140,11 @@ enum v4l2_colorfx { /* last CID + 1 */ #define V4L2_CID_LASTP1 (V4L2_CID_BASE+43) +/* USER-class private control IDs */ + +/* The base for the meye driver controls. See linux/meye.h for the list + * of controls. We reserve 16 controls for this driver. */ +#define V4L2_CID_USER_MEYE_BASE (V4L2_CID_USER_BASE + 0x1000) /* MPEG-class control IDs */ -- cgit v1.2.3 From 7e98d53086d18c877cb44e9065219335184024de Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Feb 2013 11:58:12 +0100 Subject: Synchronize fuse header with one used in library The library one has provisions for use in *BSD, add them to the kernel one too. They don't hurt and ease maintenance. Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 68619e9210b9..baee03e90438 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -1,9 +1,35 @@ /* - FUSE: Filesystem in Userspace + This file defines the kernel interface of FUSE Copyright (C) 2001-2008 Miklos Szeredi This program can be distributed under the terms of the GNU GPL. See the file COPYING. + + This -- and only this -- header file may also be distributed under + the terms of the BSD Licence as follows: + + Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. */ /* @@ -69,7 +95,16 @@ #ifndef _LINUX_FUSE_H #define _LINUX_FUSE_H +#ifdef __linux__ #include +#else +#include +#define __u64 uint64_t +#define __s64 int64_t +#define __u32 uint32_t +#define __s32 int32_t +#define __u16 uint16_t +#endif /* * Version negotiation: -- cgit v1.2.3 From 634734b63ac39e137a1c623ba74f3e062b6577db Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 6 Feb 2013 22:29:01 +0000 Subject: fuse: allow control of adaptive readdirplus use For some filesystems (e.g. GlusterFS), the cost of performing a normal readdir and readdirplus are identical. Since adaptively using readdirplus has no benefit for those systems, give users/filesystems the option to control adaptive readdirplus use. v2 of this patch incorporates Miklos's suggestion to simplify the code, as well as improving consistency of macro names and documentation. Signed-off-by: Eric Wong Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 2 ++ fs/fuse/fuse_i.h | 5 ++++- fs/fuse/inode.c | 4 +++- include/uapi/linux/fuse.h | 3 +++ 4 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2b112d978e9f..85065221a58a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -21,6 +21,8 @@ static bool fuse_use_readdirplus(struct inode *dir, struct file *filp) if (!fc->do_readdirplus) return false; + if (!fc->readdirplus_auto) + return true; if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) return true; if (filp->f_pos == 0) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fc55dd33c1e2..6aeba864f070 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -514,9 +514,12 @@ struct fuse_conn { /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; - /** Does the filesystem support readdir-plus? */ + /** Does the filesystem support readdirplus? */ unsigned do_readdirplus:1; + /** Does the filesystem want adaptive readdirplus? */ + unsigned readdirplus_auto:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9876a87255fe..01353ed75750 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -866,6 +866,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->auto_inval_data = 1; if (arg->flags & FUSE_DO_READDIRPLUS) fc->do_readdirplus = 1; + if (arg->flags & FUSE_READDIRPLUS_AUTO) + fc->readdirplus_auto = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -893,7 +895,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | - FUSE_DO_READDIRPLUS; + FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index baee03e90438..4c43b4448792 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -218,6 +218,8 @@ struct fuse_file_lock { * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages + * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) + * FUSE_READDIRPLUS_AUTO: adaptive readdirplus */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -233,6 +235,7 @@ struct fuse_file_lock { #define FUSE_HAS_IOCTL_DIR (1 << 11) #define FUSE_AUTO_INVAL_DATA (1 << 12) #define FUSE_DO_READDIRPLUS (1 << 13) +#define FUSE_READDIRPLUS_AUTO (1 << 14) /** * CUSE INIT request/reply flags -- cgit v1.2.3 From 4f4ffc3a5398ef9bdbb32db04756d7d34e356fcf Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 4 Feb 2013 19:39:52 +0000 Subject: unbreak automounter support on 64-bit kernel with 32-bit userspace (v2) automount-support is broken on the parisc architecture, because the existing #if list does not include a check for defined(__hppa__). The HPPA (parisc) architecture is similiar to other 64bit Linux targets where we have to define autofs_wqt_t (which is passed back and forth to user space) as int type which has a size of 32bit across 32 and 64bit kernels. During the discussion on the mailing list, H. Peter Anvin suggested to invert the #if list since only specific platforms (specifically those who do not have a 32bit userspace, like IA64 and Alpha) should have autofs_wqt_t as unsigned long type. This suggestion is probably the best way to go, since Arm64 (and maybe others?) seems to have a non-working automounter. So in the long run even for other new upcoming architectures this inverted check seem to be the best solution, since it will not require them to change this #if again (unless they are 64bit only). Signed-off-by: Helge Deller Acked-by: H. Peter Anvin Acked-by: Ian Kent Acked-by: Catalin Marinas CC: James Bottomley CC: Rolf Eike Beer --- include/uapi/linux/auto_fs.h | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index 77cdba9df274..bb991dfe134f 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -28,25 +28,16 @@ #define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION /* - * Architectures where both 32- and 64-bit binaries can be executed - * on 64-bit kernels need this. This keeps the structure format - * uniform, and makes sure the wait_queue_token isn't too big to be - * passed back down to the kernel. - * - * This assumes that on these architectures: - * mode 32 bit 64 bit - * ------------------------- - * int 32 bit 32 bit - * long 32 bit 64 bit - * - * If so, 32-bit user-space code should be backwards compatible. + * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed + * back to the kernel via ioctl from userspace. On architectures where 32- and + * 64-bit userspace binaries can be executed it's important that the size of + * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we + * do not break the binary ABI interface by changing the structure size. */ - -#if defined(__sparc__) || defined(__mips__) || defined(__x86_64__) \ - || defined(__powerpc__) || defined(__s390__) -typedef unsigned int autofs_wqt_t; -#else +#if defined(__ia64__) || defined(__alpha__) /* pure 64bit architectures */ typedef unsigned long autofs_wqt_t; +#else +typedef unsigned int autofs_wqt_t; #endif /* Packet types */ -- cgit v1.2.3 From d021c344051af91f42c5ba9fdedc176740cbd238 Mon Sep 17 00:00:00 2001 From: Andy King Date: Wed, 6 Feb 2013 14:23:56 +0000 Subject: VSOCK: Introduce VM Sockets VM Sockets allows communication between virtual machines and the hypervisor. User level applications both in a virtual machine and on the host can use the VM Sockets API, which facilitates fast and efficient communication between guest virtual machines and their host. A socket address family, designed to be compatible with UDP and TCP at the interface level, is provided. Today, VM Sockets is used by various VMware Tools components inside the guest for zero-config, network-less access to VMware host services. In addition to this, VMware's users are using VM Sockets for various applications, where network access of the virtual machine is restricted or non-existent. Examples of this are VMs communicating with device proxies for proprietary hardware running as host applications and automated testing of applications running within virtual machines. The VMware VM Sockets are similar to other socket types, like Berkeley UNIX socket interface. The VM Sockets module supports both connection-oriented stream sockets like TCP, and connectionless datagram sockets like UDP. The VM Sockets protocol family is defined as "AF_VSOCK" and the socket operations split for SOCK_DGRAM and SOCK_STREAM. For additional information about the use of VM Sockets, please refer to the VM Sockets Programming Guide available at: https://www.vmware.com/support/developer/vmci-sdk/ Signed-off-by: George Zhang Signed-off-by: Dmitry Torokhov Signed-off-by: Andy king Signed-off-by: David S. Miller --- include/linux/socket.h | 4 +- include/uapi/linux/vm_sockets.h | 171 ++ net/Kconfig | 1 + net/Makefile | 1 + net/vmw_vsock/Kconfig | 28 + net/vmw_vsock/Makefile | 7 + net/vmw_vsock/af_vsock.c | 2015 ++++++++++++++++++++++++ net/vmw_vsock/af_vsock.h | 175 +++ net/vmw_vsock/vmci_transport.c | 2157 ++++++++++++++++++++++++++ net/vmw_vsock/vmci_transport.h | 139 ++ net/vmw_vsock/vmci_transport_notify.c | 680 ++++++++ net/vmw_vsock/vmci_transport_notify.h | 83 + net/vmw_vsock/vmci_transport_notify_qstate.c | 438 ++++++ net/vmw_vsock/vsock_addr.c | 86 + net/vmw_vsock/vsock_addr.h | 32 + net/vmw_vsock/vsock_version.h | 22 + 16 files changed, 6038 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/vm_sockets.h create mode 100644 net/vmw_vsock/Kconfig create mode 100644 net/vmw_vsock/Makefile create mode 100644 net/vmw_vsock/af_vsock.c create mode 100644 net/vmw_vsock/af_vsock.h create mode 100644 net/vmw_vsock/vmci_transport.c create mode 100644 net/vmw_vsock/vmci_transport.h create mode 100644 net/vmw_vsock/vmci_transport_notify.c create mode 100644 net/vmw_vsock/vmci_transport_notify.h create mode 100644 net/vmw_vsock/vmci_transport_notify_qstate.c create mode 100644 net/vmw_vsock/vsock_addr.c create mode 100644 net/vmw_vsock/vsock_addr.h create mode 100644 net/vmw_vsock/vsock_version.h (limited to 'include/uapi/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 9a546ff853dc..2b9f74b0ffea 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -178,7 +178,8 @@ struct ucred { #define AF_CAIF 37 /* CAIF sockets */ #define AF_ALG 38 /* Algorithm sockets */ #define AF_NFC 39 /* NFC sockets */ -#define AF_MAX 40 /* For now.. */ +#define AF_VSOCK 40 /* vSockets */ +#define AF_MAX 41 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -221,6 +222,7 @@ struct ucred { #define PF_CAIF AF_CAIF #define PF_ALG AF_ALG #define PF_NFC AF_NFC +#define PF_VSOCK AF_VSOCK #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h new file mode 100644 index 000000000000..f7f2e99dec84 --- /dev/null +++ b/include/uapi/linux/vm_sockets.h @@ -0,0 +1,171 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VM_SOCKETS_H_ +#define _VM_SOCKETS_H_ + +#if !defined(__KERNEL__) +#include +#endif + +/* Option name for STREAM socket buffer size. Use as the option name in + * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that + * specifies the size of the buffer underlying a vSockets STREAM socket. + * Value is clamped to the MIN and MAX. + */ + +#define SO_VM_SOCKETS_BUFFER_SIZE 0 + +/* Option name for STREAM socket minimum buffer size. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long that + * specifies the minimum size allowed for the buffer underlying a vSockets + * STREAM socket. + */ + +#define SO_VM_SOCKETS_BUFFER_MIN_SIZE 1 + +/* Option name for STREAM socket maximum buffer size. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long + * that specifies the maximum size allowed for the buffer underlying a + * vSockets STREAM socket. + */ + +#define SO_VM_SOCKETS_BUFFER_MAX_SIZE 2 + +/* Option name for socket peer's host-specific VM ID. Use as the option name + * in getsockopt(3) to get a host-specific identifier for the peer endpoint's + * VM. The identifier is a signed integer. + * Only available for hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_PEER_HOST_VM_ID 3 + +/* Option name for socket's service label. Use as the option name in + * setsockopt(3) or getsockopt(3) to set or get the service label for a socket. + * The service label is a C-style NUL-terminated string. Only available for + * hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_SERVICE_LABEL 4 + +/* Option name for determining if a socket is trusted. Use as the option name + * in getsockopt(3) to determine if a socket is trusted. The value is a + * signed integer. + */ + +#define SO_VM_SOCKETS_TRUSTED 5 + +/* Option name for STREAM socket connection timeout. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get the connection + * timeout for a STREAM socket. + */ + +#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6 + +/* Option name for using non-blocking send/receive. Use as the option name + * for setsockopt(3) or getsockopt(3) to set or get the non-blocking + * transmit/receive flag for a STREAM socket. This flag determines whether + * send() and recv() can be called in non-blocking contexts for the given + * socket. The value is a signed integer. + * + * This option is only relevant to kernel endpoints, where descheduling the + * thread of execution is not allowed, for example, while holding a spinlock. + * It is not to be confused with conventional non-blocking socket operations. + * + * Only available for hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_NONBLOCK_TXRX 7 + +/* The vSocket equivalent of INADDR_ANY. This works for the svm_cid field of + * sockaddr_vm and indicates the context ID of the current endpoint. + */ + +#define VMADDR_CID_ANY -1U + +/* Bind to any available port. Works for the svm_port field of + * sockaddr_vm. + */ + +#define VMADDR_PORT_ANY -1U + +/* Use this as the destination CID in an address when referring to the + * hypervisor. VMCI relies on it being 0, but this would be useful for other + * transports too. + */ + +#define VMADDR_CID_HYPERVISOR 0 + +/* This CID is specific to VMCI and can be considered reserved (even VMCI + * doesn't use it anymore, it's a legacy value from an older release). + */ + +#define VMADDR_CID_RESERVED 1 + +/* Use this as the destination CID in an address when referring to the host + * (any process other than the hypervisor). VMCI relies on it being 2, but + * this would be useful for other transports too. + */ + +#define VMADDR_CID_HOST 2 + +/* Invalid vSockets version. */ + +#define VM_SOCKETS_INVALID_VERSION -1U + +/* The epoch (first) component of the vSockets version. A single byte + * representing the epoch component of the vSockets version. + */ + +#define VM_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24) + +/* The major (second) component of the vSockets version. A single byte + * representing the major component of the vSockets version. Typically + * changes for every major release of a product. + */ + +#define VM_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16) + +/* The minor (third) component of the vSockets version. Two bytes representing + * the minor component of the vSockets version. + */ + +#define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF)) + +/* Address structure for vSockets. The address family should be set to + * whatever vmci_sock_get_af_value_fd() returns. The structure members should + * all align on their natural boundaries without resorting to compiler packing + * directives. The total size of this structure should be exactly the same as + * that of struct sockaddr. + */ + +struct sockaddr_vm { + sa_family_t svm_family; + unsigned short svm_reserved1; + unsigned int svm_port; + unsigned int svm_cid; + unsigned char svm_zero[sizeof(struct sockaddr) - + sizeof(sa_family_t) - + sizeof(unsigned short) - + sizeof(unsigned int) - sizeof(unsigned int)]; +}; + +#define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) + +#if defined(__KERNEL__) +int vm_sockets_get_local_cid(void); +#endif + +#endif diff --git a/net/Kconfig b/net/Kconfig index c31348e70aad..5a1888bb036d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -217,6 +217,7 @@ source "net/dcb/Kconfig" source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" +source "net/vmw_vsock/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index c5aa8b3b49dc..091e7b04f301 100644 --- a/net/Makefile +++ b/net/Makefile @@ -69,3 +69,4 @@ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ +obj-$(CONFIG_VSOCKETS) += vmw_vsock/ diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig new file mode 100644 index 000000000000..b5fa7e40cdcb --- /dev/null +++ b/net/vmw_vsock/Kconfig @@ -0,0 +1,28 @@ +# +# Vsock protocol +# + +config VSOCKETS + tristate "Virtual Socket protocol" + help + Virtual Socket Protocol is a socket protocol similar to TCP/IP + allowing comunication between Virtual Machines and hypervisor + or host. + + You should also select one or more hypervisor-specific transports + below. + + To compile this driver as a module, choose M here: the module + will be called vsock. If unsure, say N. + +config VMWARE_VMCI_VSOCKETS + tristate "VMware VMCI transport for Virtual Sockets" + depends on VSOCKETS && VMWARE_VMCI + help + This module implements a VMCI transport for Virtual Sockets. + + Enable this transport if your Virtual Machine runs on a VMware + hypervisor. + + To compile this driver as a module, choose M here: the module + will be called vmw_vsock_vmci_transport. If unsure, say N. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile new file mode 100644 index 000000000000..2ce52d70f224 --- /dev/null +++ b/net/vmw_vsock/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_VSOCKETS) += vsock.o +obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o + +vsock-y += af_vsock.o vsock_addr.o + +vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ + vmci_transport_notify_qstate.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c new file mode 100644 index 000000000000..54bb7bdf92d3 --- /dev/null +++ b/net/vmw_vsock/af_vsock.c @@ -0,0 +1,2015 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +/* Implementation notes: + * + * - There are two kinds of sockets: those created by user action (such as + * calling socket(2)) and those created by incoming connection request packets. + * + * - There are two "global" tables, one for bound sockets (sockets that have + * specified an address that they are responsible for) and one for connected + * sockets (sockets that have established a connection with another socket). + * These tables are "global" in that all sockets on the system are placed + * within them. - Note, though, that the bound table contains an extra entry + * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in + * that list. The bound table is used solely for lookup of sockets when packets + * are received and that's not necessary for SOCK_DGRAM sockets since we create + * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM + * sockets out of the bound hash buckets will reduce the chance of collisions + * when looking for SOCK_STREAM sockets and prevents us from having to check the + * socket type in the hash table lookups. + * + * - Sockets created by user action will either be "client" sockets that + * initiate a connection or "server" sockets that listen for connections; we do + * not support simultaneous connects (two "client" sockets connecting). + * + * - "Server" sockets are referred to as listener sockets throughout this + * implementation because they are in the SS_LISTEN state. When a connection + * request is received (the second kind of socket mentioned above), we create a + * new socket and refer to it as a pending socket. These pending sockets are + * placed on the pending connection list of the listener socket. When future + * packets are received for the address the listener socket is bound to, we + * check if the source of the packet is from one that has an existing pending + * connection. If it does, we process the packet for the pending socket. When + * that socket reaches the connected state, it is removed from the listener + * socket's pending list and enqueued in the listener socket's accept queue. + * Callers of accept(2) will accept connected sockets from the listener socket's + * accept queue. If the socket cannot be accepted for some reason then it is + * marked rejected. Once the connection is accepted, it is owned by the user + * process and the responsibility for cleanup falls with that user process. + * + * - It is possible that these pending sockets will never reach the connected + * state; in fact, we may never receive another packet after the connection + * request. Because of this, we must schedule a cleanup function to run in the + * future, after some amount of time passes where a connection should have been + * established. This function ensures that the socket is off all lists so it + * cannot be retrieved, then drops all references to the socket so it is cleaned + * up (sock_put() -> sk_free() -> our sk_destruct implementation). Note this + * function will also cleanup rejected sockets, those that reach the connected + * state but leave it before they have been accepted. + * + * - Sockets created by user action will be cleaned up when the user process + * calls close(2), causing our release implementation to be called. Our release + * implementation will perform some cleanup then drop the last reference so our + * sk_destruct implementation is invoked. Our sk_destruct implementation will + * perform additional cleanup that's common for both types of sockets. + * + * - A socket's reference count is what ensures that the structure won't be + * freed. Each entry in a list (such as the "global" bound and connected tables + * and the listener socket's pending list and connected queue) ensures a + * reference. When we defer work until process context and pass a socket as our + * argument, we must ensure the reference count is increased to ensure the + * socket isn't freed before the function is run; the deferred function will + * then drop the reference. + */ + +#include + +#define EXPORT_SYMTAB +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "af_vsock.h" +#include "vsock_version.h" + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); +static void vsock_sk_destruct(struct sock *sk); +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +/* Protocol family. */ +static struct proto vsock_proto = { + .name = "AF_VSOCK", + .owner = THIS_MODULE, + .obj_size = sizeof(struct vsock_sock), +}; + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +static const struct vsock_transport *transport; +static DEFINE_MUTEX(vsock_register_mutex); + +/**** EXPORTS ****/ + +/* Get the ID of the local context. This is transport dependent. */ + +int vm_sockets_get_local_cid(void) +{ + return transport->get_local_cid(); +} +EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); + +/**** UTILS ****/ + +/* Each bound VSocket is stored in the bind hash table and each connected + * VSocket is stored in the connected hash table. + * + * Unbound sockets are all put on the same list attached to the end of the hash + * table (vsock_unbound_sockets). Bound sockets are added to the hash table in + * the bucket that their local address hashes to (vsock_bound_sockets(addr) + * represents the list that addr hashes to). + * + * Specifically, we initialize the vsock_bind_table array to a size of + * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through + * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and + * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function + * mods with VSOCK_HASH_SIZE - 1 to ensure this. + */ +#define VSOCK_HASH_SIZE 251 +#define MAX_PORT_RETRIES 24 + +#define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) +#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) +#define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) + +/* XXX This can probably be implemented in a better way. */ +#define VSOCK_CONN_HASH(src, dst) \ + (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) +#define vsock_connected_sockets(src, dst) \ + (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) +#define vsock_connected_sockets_vsk(vsk) \ + vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) + +static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +static DEFINE_SPINLOCK(vsock_table_lock); + +static __init void vsock_init_tables(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++) + INIT_LIST_HEAD(&vsock_bind_table[i]); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) + INIT_LIST_HEAD(&vsock_connected_table[i]); +} + +static void __vsock_insert_bound(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->bound_table, list); +} + +static void __vsock_insert_connected(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->connected_table, list); +} + +static void __vsock_remove_bound(struct vsock_sock *vsk) +{ + list_del_init(&vsk->bound_table); + sock_put(&vsk->sk); +} + +static void __vsock_remove_connected(struct vsock_sock *vsk) +{ + list_del_init(&vsk->connected_table); + sock_put(&vsk->sk); +} + +static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) + if (vsock_addr_equals_addr_any(addr, &vsk->local_addr)) + return sk_vsock(vsk); + + return NULL; +} + +static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_connected_sockets(src, dst), + connected_table) { + if (vsock_addr_equals_addr(src, &vsk->remote_addr) + && vsock_addr_equals_addr(dst, &vsk->local_addr)) { + return sk_vsock(vsk); + } + } + + return NULL; +} + +static bool __vsock_in_bound_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->bound_table); +} + +static bool __vsock_in_connected_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->connected_table); +} + +static void vsock_insert_unbound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_insert_bound(vsock_unbound_sockets, vsk); + spin_unlock_bh(&vsock_table_lock); +} + +void vsock_insert_connected(struct vsock_sock *vsk) +{ + struct list_head *list = vsock_connected_sockets( + &vsk->remote_addr, &vsk->local_addr); + + spin_lock_bh(&vsock_table_lock); + __vsock_insert_connected(list, vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_insert_connected); + +void vsock_remove_bound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_bound(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_bound); + +void vsock_remove_connected(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_connected(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_connected); + +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_bound_socket(addr); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_bound_socket); + +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_connected_socket(src, dst); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_connected_socket); + +static bool vsock_in_bound_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_bound_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +static bool vsock_in_connected_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_connected_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +{ + int i; + + spin_lock_bh(&vsock_table_lock); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { + struct vsock_sock *vsk; + list_for_each_entry(vsk, &vsock_connected_table[i], + connected_table); + fn(sk_vsock(vsk)); + } + + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket); + +void vsock_add_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + + vlistener = vsock_sk(listener); + vpending = vsock_sk(pending); + + sock_hold(pending); + sock_hold(listener); + list_add_tail(&vpending->pending_links, &vlistener->pending_links); +} +EXPORT_SYMBOL_GPL(vsock_add_pending); + +void vsock_remove_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vpending = vsock_sk(pending); + + list_del_init(&vpending->pending_links); + sock_put(listener); + sock_put(pending); +} +EXPORT_SYMBOL_GPL(vsock_remove_pending); + +void vsock_enqueue_accept(struct sock *listener, struct sock *connected) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + vconnected = vsock_sk(connected); + + sock_hold(connected); + sock_hold(listener); + list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue); +} +EXPORT_SYMBOL_GPL(vsock_enqueue_accept); + +static struct sock *vsock_dequeue_accept(struct sock *listener) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + + if (list_empty(&vlistener->accept_queue)) + return NULL; + + vconnected = list_entry(vlistener->accept_queue.next, + struct vsock_sock, accept_queue); + + list_del_init(&vconnected->accept_queue); + sock_put(listener); + /* The caller will need a reference on the connected socket so we let + * it call sock_put(). + */ + + return sk_vsock(vconnected); +} + +static bool vsock_is_accept_queue_empty(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return list_empty(&vsk->accept_queue); +} + +static bool vsock_is_pending(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return !list_empty(&vsk->pending_links); +} + +static int vsock_send_shutdown(struct sock *sk, int mode) +{ + return transport->shutdown(vsock_sk(sk), mode); +} + +void vsock_pending_work(struct work_struct *work) +{ + struct sock *sk; + struct sock *listener; + struct vsock_sock *vsk; + bool cleanup; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + listener = vsk->listener; + cleanup = true; + + lock_sock(listener); + lock_sock(sk); + + if (vsock_is_pending(sk)) { + vsock_remove_pending(listener, sk); + } else if (!vsk->rejected) { + /* We are not on the pending list and accept() did not reject + * us, so we must have been accepted by our user process. We + * just need to drop our references to the sockets and be on + * our way. + */ + cleanup = false; + goto out; + } + + listener->sk_ack_backlog--; + + /* We need to remove ourself from the global connected sockets list so + * incoming packets can't find this socket, and to reduce the reference + * count. + */ + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + sk->sk_state = SS_FREE; + +out: + release_sock(sk); + release_sock(listener); + if (cleanup) + sock_put(sk); + + sock_put(sk); + sock_put(listener); +} +EXPORT_SYMBOL_GPL(vsock_pending_work); + +/**** SOCKET OPERATIONS ****/ + +static int __vsock_bind_stream(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + static u32 port = LAST_RESERVED_PORT + 1; + struct sockaddr_vm new_addr; + + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); + + if (addr->svm_port == VMADDR_PORT_ANY) { + bool found = false; + unsigned int i; + + for (i = 0; i < MAX_PORT_RETRIES; i++) { + if (port <= LAST_RESERVED_PORT) + port = LAST_RESERVED_PORT + 1; + + new_addr.svm_port = port++; + + if (!__vsock_find_bound_socket(&new_addr)) { + found = true; + break; + } + } + + if (!found) + return -EADDRNOTAVAIL; + } else { + /* If port is in reserved range, ensure caller + * has necessary privileges. + */ + if (addr->svm_port <= LAST_RESERVED_PORT && + !capable(CAP_NET_BIND_SERVICE)) { + return -EACCES; + } + + if (__vsock_find_bound_socket(&new_addr)) + return -EADDRINUSE; + } + + vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); + + /* Remove stream sockets from the unbound list and add them to the hash + * table for easy lookup by its address. The unbound list is simply an + * extra entry at the end of the hash table, a trick used by AF_UNIX. + */ + __vsock_remove_bound(vsk); + __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk); + + return 0; +} + +static int __vsock_bind_dgram(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + return transport->dgram_bind(vsk, addr); +} + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk = vsock_sk(sk); + u32 cid; + int retval; + + /* First ensure this socket isn't already bound. */ + if (vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + /* Now bind to the provided address or select appropriate values if + * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that + * like AF_INET prevents binding to a non-local IP address (in most + * cases), we only allow binding to the local CID. + */ + cid = transport->get_local_cid(); + if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY) + return -EADDRNOTAVAIL; + + switch (sk->sk_socket->type) { + case SOCK_STREAM: + spin_lock_bh(&vsock_table_lock); + retval = __vsock_bind_stream(vsk, addr); + spin_unlock_bh(&vsock_table_lock); + break; + + case SOCK_DGRAM: + retval = __vsock_bind_dgram(vsk, addr); + break; + + default: + retval = -EINVAL; + break; + } + + return retval; +} + +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, + unsigned short type) +{ + struct sock *sk; + struct vsock_sock *psk; + struct vsock_sock *vsk; + + sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + + /* sk->sk_type is normally set in sock_init_data, but only if sock is + * non-NULL. We make sure that our sockets always have a type by + * setting it here if needed. + */ + if (!sock) + sk->sk_type = type; + + vsk = vsock_sk(sk); + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + sk->sk_destruct = vsock_sk_destruct; + sk->sk_backlog_rcv = vsock_queue_rcv_skb; + sk->sk_state = 0; + sock_reset_flag(sk, SOCK_DONE); + + INIT_LIST_HEAD(&vsk->bound_table); + INIT_LIST_HEAD(&vsk->connected_table); + vsk->listener = NULL; + INIT_LIST_HEAD(&vsk->pending_links); + INIT_LIST_HEAD(&vsk->accept_queue); + vsk->rejected = false; + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + vsk->peer_shutdown = 0; + + psk = parent ? vsock_sk(parent) : NULL; + if (parent) { + vsk->trusted = psk->trusted; + vsk->owner = get_cred(psk->owner); + vsk->connect_timeout = psk->connect_timeout; + } else { + vsk->trusted = capable(CAP_NET_ADMIN); + vsk->owner = get_current_cred(); + vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; + } + + if (transport->init(vsk, psk) < 0) { + sk_free(sk); + return NULL; + } + + if (sock) + vsock_insert_unbound(vsk); + + return sk; +} +EXPORT_SYMBOL_GPL(__vsock_create); + +static void __vsock_release(struct sock *sk) +{ + if (sk) { + struct sk_buff *skb; + struct sock *pending; + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + pending = NULL; /* Compiler warning. */ + + if (vsock_in_bound_table(vsk)) + vsock_remove_bound(vsk); + + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + transport->release(vsk); + + lock_sock(sk); + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + + while ((skb = skb_dequeue(&sk->sk_receive_queue))) + kfree_skb(skb); + + /* Clean up any sockets that never were accepted. */ + while ((pending = vsock_dequeue_accept(sk)) != NULL) { + __vsock_release(pending); + sock_put(pending); + } + + release_sock(sk); + sock_put(sk); + } +} + +static void vsock_sk_destruct(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + transport->destruct(vsk); + + /* When clearing these addresses, there's no need to set the family and + * possibly register the address family with the kernel. + */ + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + put_cred(vsk->owner); +} + +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sock_queue_rcv_skb(sk, skb); + if (err) + kfree_skb(skb); + + return err; +} + +s64 vsock_stream_has_data(struct vsock_sock *vsk) +{ + return transport->stream_has_data(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_data); + +s64 vsock_stream_has_space(struct vsock_sock *vsk) +{ + return transport->stream_has_space(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_space); + +static int vsock_release(struct socket *sock) +{ + __vsock_release(sock->sk); + sock->sk = NULL; + sock->state = SS_FREE; + + return 0; +} + +static int +vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + int err; + struct sock *sk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + + if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0) + return -EINVAL; + + lock_sock(sk); + err = __vsock_bind(sk, vm_addr); + release_sock(sk); + + return err; +} + +static int vsock_getname(struct socket *sock, + struct sockaddr *addr, int *addr_len, int peer) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (peer) { + if (sock->state != SS_CONNECTED) { + err = -ENOTCONN; + goto out; + } + vm_addr = &vsk->remote_addr; + } else { + vm_addr = &vsk->local_addr; + } + + if (!vm_addr) { + err = -EINVAL; + goto out; + } + + /* sys_getsockname() and sys_getpeername() pass us a + * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately + * that macro is defined in socket.c instead of .h, so we hardcode its + * value here. + */ + BUILD_BUG_ON(sizeof(*vm_addr) > 128); + memcpy(addr, vm_addr, sizeof(*vm_addr)); + *addr_len = sizeof(*vm_addr); + +out: + release_sock(sk); + return err; +} + +static int vsock_shutdown(struct socket *sock, int mode) +{ + int err; + struct sock *sk; + + /* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses + * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode + * here like the other address families do. Note also that the + * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), + * which is what we want. + */ + mode++; + + if ((mode & ~SHUTDOWN_MASK) || !mode) + return -EINVAL; + + /* If this is a STREAM socket and it is not connected then bail out + * immediately. If it is a DGRAM socket then we must first kick the + * socket so that it wakes up from any sleeping calls, for example + * recv(), and then afterwards return the error. + */ + + sk = sock->sk; + if (sock->state == SS_UNCONNECTED) { + err = -ENOTCONN; + if (sk->sk_type == SOCK_STREAM) + return err; + } else { + sock->state = SS_DISCONNECTING; + err = 0; + } + + /* Receive and send shutdowns are treated alike. */ + mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); + if (mode) { + lock_sock(sk); + sk->sk_shutdown |= mode; + sk->sk_state_change(sk); + release_sock(sk); + + if (sk->sk_type == SOCK_STREAM) { + sock_reset_flag(sk, SOCK_DONE); + vsock_send_shutdown(sk, mode); + } + } + + return err; +} + +static unsigned int vsock_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk; + unsigned int mask; + struct vsock_sock *vsk; + + sk = sock->sk; + vsk = vsock_sk(sk); + + poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + if (sk->sk_err) + /* Signify that there has been an error on this socket. */ + mask |= POLLERR; + + /* INET sockets treat local write shutdown and peer write shutdown as a + * case of POLLHUP set. + */ + if ((sk->sk_shutdown == SHUTDOWN_MASK) || + ((sk->sk_shutdown & SEND_SHUTDOWN) && + (vsk->peer_shutdown & SEND_SHUTDOWN))) { + mask |= POLLHUP; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLRDHUP; + } + + if (sock->type == SOCK_DGRAM) { + /* For datagram sockets we can read if there is something in + * the queue and write as long as the socket isn't shutdown for + * sending. + */ + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) { + mask |= POLLIN | POLLRDNORM; + } + + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + } else if (sock->type == SOCK_STREAM) { + lock_sock(sk); + + /* Listening sockets that have connections in their accept + * queue can be read. + */ + if (sk->sk_state == SS_LISTEN + && !vsock_is_accept_queue_empty(sk)) + mask |= POLLIN | POLLRDNORM; + + /* If there is something in the queue then we can read. */ + if (transport->stream_is_active(vsk) && + !(sk->sk_shutdown & RCV_SHUTDOWN)) { + bool data_ready_now = false; + int ret = transport->notify_poll_in( + vsk, 1, &data_ready_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (data_ready_now) + mask |= POLLIN | POLLRDNORM; + + } + } + + /* Sockets whose connections have been closed, reset, or + * terminated should also be considered read, and we check the + * shutdown flag for that. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLIN | POLLRDNORM; + } + + /* Connected sockets that can produce data can be written. */ + if (sk->sk_state == SS_CONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + bool space_avail_now = false; + int ret = transport->notify_poll_out( + vsk, 1, &space_avail_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (space_avail_now) + /* Remove POLLWRBAND since INET + * sockets are not setting it. + */ + mask |= POLLOUT | POLLWRNORM; + + } + } + } + + /* Simulate INET socket poll behaviors, which sets + * POLLOUT|POLLWRNORM when peer is closed and nothing to read, + * but local send is not shutdown. + */ + if (sk->sk_state == SS_UNCONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM; + + } + + release_sock(sk); + } + + return mask; +} + +static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + /* For now, MSG_DONTWAIT is always assumed... */ + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + /* If the provided message contains an address, use that. Otherwise + * fall back on the socket's remote handle (if it has been connected). + */ + if (msg->msg_name && + vsock_addr_cast(msg->msg_name, msg->msg_namelen, + &remote_addr) == 0) { + /* Ensure this address is of the right type and is a valid + * destination. + */ + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + if (!vsock_addr_bound(remote_addr)) { + err = -EINVAL; + goto out; + } + } else if (sock->state == SS_CONNECTED) { + remote_addr = &vsk->remote_addr; + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + /* XXX Should connect() or this function ensure remote_addr is + * bound? + */ + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EINVAL; + goto out; + } + } else { + err = -EINVAL; + goto out; + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + err = transport->dgram_enqueue(vsk, remote_addr, msg->msg_iov, len); + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_connect(struct socket *sock, + struct sockaddr *addr, int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + + err = vsock_addr_cast(addr, addr_len, &remote_addr); + if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) { + lock_sock(sk); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + sock->state = SS_UNCONNECTED; + release_sock(sk); + return 0; + } else if (err != 0) + return -EINVAL; + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); + sock->state = SS_CONNECTED; + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len, + flags); +} + +static const struct proto_ops vsock_dgram_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_dgram_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = vsock_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = vsock_dgram_sendmsg, + .recvmsg = vsock_dgram_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static void vsock_connect_timeout(struct work_struct *work) +{ + struct sock *sk; + struct vsock_sock *vsk; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + + lock_sock(sk); + if (sk->sk_state == SS_CONNECTING && + (sk->sk_shutdown != SHUTDOWN_MASK)) { + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ETIMEDOUT; + sk->sk_error_report(sk); + } + release_sock(sk); + + sock_put(sk); +} + +static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, + int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + /* XXX AF_UNSPEC should make us disconnect like AF_INET. */ + switch (sock->state) { + case SS_CONNECTED: + err = -EISCONN; + goto out; + case SS_DISCONNECTING: + err = -EINVAL; + goto out; + case SS_CONNECTING: + /* This continues on so we can move sock into the SS_CONNECTED + * state once the connection has completed (at which point err + * will be set to zero also). Otherwise, we will either wait + * for the connection or return -EALREADY should this be a + * non-blocking call. + */ + err = -EALREADY; + break; + default: + if ((sk->sk_state == SS_LISTEN) || + vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { + err = -EINVAL; + goto out; + } + + /* The hypervisor and well-known contexts do not have socket + * endpoints. + */ + if (!transport->stream_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -ENETUNREACH; + goto out; + } + + /* Set the remote address that we are connecting to. */ + memcpy(&vsk->remote_addr, remote_addr, + sizeof(vsk->remote_addr)); + + /* Autobind this socket to the local address if necessary. */ + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + sk->sk_state = SS_CONNECTING; + + err = transport->connect(vsk); + if (err < 0) + goto out; + + /* Mark sock as connecting and set the error code to in + * progress in case this is a non-blocking connect. + */ + sock->state = SS_CONNECTING; + err = -EINPROGRESS; + } + + /* The receive path will handle all communication until we are able to + * enter the connected state. Here we wait for the connection to be + * completed or a notification of an error. + */ + timeout = vsk->connect_timeout; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { + if (flags & O_NONBLOCK) { + /* If we're not going to block, we schedule a timeout + * function to generate a timeout on the connection + * attempt, in case the peer doesn't respond in a + * timely manner. We hold on to the socket until the + * timeout fires. + */ + sock_hold(sk); + INIT_DELAYED_WORK(&vsk->dwork, + vsock_connect_timeout); + schedule_delayed_work(&vsk->dwork, timeout); + + /* Skip ahead to preserve error code set above. */ + goto out_wait; + } + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait_error; + } else if (timeout == 0) { + err = -ETIMEDOUT; + goto out_wait_error; + } + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + } + + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait_error; + } else + err = 0; + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; + +out_wait_error: + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; +} + +static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *listener; + int err; + struct sock *connected; + struct vsock_sock *vconnected; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + listener = sock->sk; + + lock_sock(listener); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (listener->sk_state != SS_LISTEN) { + err = -EINVAL; + goto out; + } + + /* Wait for children sockets to appear; these are the new sockets + * created upon connection establishment. + */ + timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + + while ((connected = vsock_dequeue_accept(listener)) == NULL && + listener->sk_err == 0) { + release_sock(listener); + timeout = schedule_timeout(timeout); + lock_sock(listener); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + } + + if (listener->sk_err) + err = -listener->sk_err; + + if (connected) { + listener->sk_ack_backlog--; + + lock_sock(connected); + vconnected = vsock_sk(connected); + + /* If the listener socket has received an error, then we should + * reject this socket and return. Note that we simply mark the + * socket rejected, drop our reference, and let the cleanup + * function handle the cleanup; the fact that we found it in + * the listener's accept queue guarantees that the cleanup + * function hasn't run yet. + */ + if (err) { + vconnected->rejected = true; + release_sock(connected); + sock_put(connected); + goto out_wait; + } + + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); + release_sock(connected); + sock_put(connected); + } + +out_wait: + finish_wait(sk_sleep(listener), &wait); +out: + release_sock(listener); + return err; +} + +static int vsock_listen(struct socket *sock, int backlog) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + + sk = sock->sk; + + lock_sock(sk); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (sock->state != SS_UNCONNECTED) { + err = -EINVAL; + goto out; + } + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + err = -EINVAL; + goto out; + } + + sk->sk_max_ack_backlog = backlog; + sk->sk_state = SS_LISTEN; + + err = 0; + +out: + release_sock(sk); + return err; +} + +static int vsock_stream_setsockopt(struct socket *sock, + int level, + int optname, + char __user *optval, + unsigned int optlen) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + +#define COPY_IN(_v) \ + do { \ + if (optlen < sizeof(_v)) { \ + err = -EINVAL; \ + goto exit; \ + } \ + if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \ + err = -EFAULT; \ + goto exit; \ + } \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + COPY_IN(val); + transport->set_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + COPY_IN(val); + transport->set_max_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + COPY_IN(val); + transport->set_min_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + COPY_IN(tv); + if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && + tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { + vsk->connect_timeout = tv.tv_sec * HZ + + DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ)); + if (vsk->connect_timeout == 0) + vsk->connect_timeout = + VSOCK_DEFAULT_CONNECT_TIMEOUT; + + } else { + err = -ERANGE; + } + break; + } + + default: + err = -ENOPROTOOPT; + break; + } + +#undef COPY_IN + +exit: + release_sock(sk); + return err; +} + +static int vsock_stream_getsockopt(struct socket *sock, + int level, int optname, + char __user *optval, + int __user *optlen) +{ + int err; + int len; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + + err = get_user(len, optlen); + if (err != 0) + return err; + +#define COPY_OUT(_v) \ + do { \ + if (len < sizeof(_v)) \ + return -EINVAL; \ + \ + len = sizeof(_v); \ + if (copy_to_user(optval, &_v, len) != 0) \ + return -EFAULT; \ + \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + val = transport->get_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + val = transport->get_max_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + val = transport->get_min_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + tv.tv_sec = vsk->connect_timeout / HZ; + tv.tv_usec = + (vsk->connect_timeout - + tv.tv_sec * HZ) * (1000000 / HZ); + COPY_OUT(tv); + break; + } + default: + return -ENOPROTOOPT; + } + + err = put_user(len, optlen); + if (err != 0) + return -EFAULT; + +#undef COPY_OUT + + return 0; +} + +static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk; + struct vsock_sock *vsk; + ssize_t total_written; + long timeout; + int err; + struct vsock_transport_send_notify_data send_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + total_written = 0; + err = 0; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + /* Callers should not provide a destination with stream sockets. */ + if (msg->msg_namelen) { + err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; + goto out; + } + + /* Send data only if both sides are not shutdown in the direction. */ + if (sk->sk_shutdown & SEND_SHUTDOWN || + vsk->peer_shutdown & RCV_SHUTDOWN) { + err = -EPIPE; + goto out; + } + + if (sk->sk_state != SS_CONNECTED || + !vsock_addr_bound(&vsk->local_addr)) { + err = -ENOTCONN; + goto out; + } + + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EDESTADDRREQ; + goto out; + } + + /* Wait for room in the produce queue to enqueue our user's data. */ + timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + err = transport->notify_send_init(vsk, &send_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (total_written < len) { + ssize_t written; + + while (vsock_stream_has_space(vsk) == 0 && + sk->sk_err == 0 && + !(sk->sk_shutdown & SEND_SHUTDOWN) && + !(vsk->peer_shutdown & RCV_SHUTDOWN)) { + + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + err = transport->notify_send_pre_block(vsk, &send_data); + if (err < 0) + goto out_wait; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + + /* These checks occur both as part of and after the loop + * conditional since we need to check before and after + * sleeping. + */ + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait; + } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || + (vsk->peer_shutdown & RCV_SHUTDOWN)) { + err = -EPIPE; + goto out_wait; + } + + err = transport->notify_send_pre_enqueue(vsk, &send_data); + if (err < 0) + goto out_wait; + + /* Note that enqueue will only write as many bytes as are free + * in the produce queue, so we don't need to ensure len is + * smaller than the queue size. It is the caller's + * responsibility to check how many bytes we were able to send. + */ + + written = transport->stream_enqueue( + vsk, msg->msg_iov, + len - total_written); + if (written < 0) { + err = -ENOMEM; + goto out_wait; + } + + total_written += written; + + err = transport->notify_send_post_enqueue( + vsk, written, &send_data); + if (err < 0) + goto out_wait; + + } + +out_wait: + if (total_written > 0) + err = total_written; + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + + +static int +vsock_stream_recvmsg(struct kiocb *kiocb, + struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk; + struct vsock_sock *vsk; + int err; + size_t target; + ssize_t copied; + long timeout; + struct vsock_transport_recv_notify_data recv_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (sk->sk_state != SS_CONNECTED) { + /* Recvmsg is supposed to return 0 if a peer performs an + * orderly shutdown. Differentiate between that case and when a + * peer has not connected or a local shutdown occured with the + * SOCK_DONE flag. + */ + if (sock_flag(sk, SOCK_DONE)) + err = 0; + else + err = -ENOTCONN; + + goto out; + } + + if (flags & MSG_OOB) { + err = -EOPNOTSUPP; + goto out; + } + + /* We don't check peer_shutdown flag here since peer may actually shut + * down, but there can be data in the queue that a local socket can + * receive. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN) { + err = 0; + goto out; + } + + /* It is valid on Linux to pass in a zero-length receive buffer. This + * is not an error. We may as well bail out now. + */ + if (!len) { + err = 0; + goto out; + } + + /* We must not copy less than target bytes into the user's buffer + * before returning successfully, so we wait for the consume queue to + * have that much data to consume before dequeueing. Note that this + * makes it impossible to handle cases where target is greater than the + * queue size. + */ + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + if (target >= transport->stream_rcvhiwat(vsk)) { + err = -ENOMEM; + goto out; + } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + copied = 0; + + err = transport->notify_recv_init(vsk, target, &recv_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (1) { + s64 ready = vsock_stream_has_data(vsk); + + if (ready < 0) { + /* Invalid queue pair content. XXX This should be + * changed to a connection reset in a later change. + */ + + err = -ENOMEM; + goto out_wait; + } else if (ready > 0) { + ssize_t read; + + err = transport->notify_recv_pre_dequeue( + vsk, target, &recv_data); + if (err < 0) + break; + + read = transport->stream_dequeue( + vsk, msg->msg_iov, + len - copied, flags); + if (read < 0) { + err = -ENOMEM; + break; + } + + copied += read; + + err = transport->notify_recv_post_dequeue( + vsk, target, read, + !(flags & MSG_PEEK), &recv_data); + if (err < 0) + goto out_wait; + + if (read >= target || flags & MSG_PEEK) + break; + + target -= read; + } else { + if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) + || (vsk->peer_shutdown & SEND_SHUTDOWN)) { + break; + } + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + break; + } + + err = transport->notify_recv_pre_block( + vsk, target, &recv_data); + if (err < 0) + break; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + break; + } else if (timeout == 0) { + err = -EAGAIN; + break; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + } + + if (sk->sk_err) + err = -sk->sk_err; + else if (sk->sk_shutdown & RCV_SHUTDOWN) + err = 0; + + if (copied > 0) { + /* We only do these additional bookkeeping/notification steps + * if we actually copied something out of the queue pair + * instead of just peeking ahead. + */ + + if (!(flags & MSG_PEEK)) { + /* If the other side has shutdown for sending and there + * is nothing more to read, then modify the socket + * state. + */ + if (vsk->peer_shutdown & SEND_SHUTDOWN) { + if (vsock_stream_has_data(vsk) <= 0) { + sk->sk_state = SS_UNCONNECTED; + sock_set_flag(sk, SOCK_DONE); + sk->sk_state_change(sk); + } + } + } + err = copied; + } + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + +static const struct proto_ops vsock_stream_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_stream_connect, + .socketpair = sock_no_socketpair, + .accept = vsock_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = vsock_listen, + .shutdown = vsock_shutdown, + .setsockopt = vsock_stream_setsockopt, + .getsockopt = vsock_stream_getsockopt, + .sendmsg = vsock_stream_sendmsg, + .recvmsg = vsock_stream_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static int vsock_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + if (!sock) + return -EINVAL; + + if (protocol) + return -EPROTONOSUPPORT; + + switch (sock->type) { + case SOCK_DGRAM: + sock->ops = &vsock_dgram_ops; + break; + case SOCK_STREAM: + sock->ops = &vsock_stream_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } + + sock->state = SS_UNCONNECTED; + + return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; +} + +static const struct net_proto_family vsock_family_ops = { + .family = AF_VSOCK, + .create = vsock_create, + .owner = THIS_MODULE, +}; + +static long vsock_dev_do_ioctl(struct file *filp, + unsigned int cmd, void __user *ptr) +{ + u32 __user *p = ptr; + int retval = 0; + + switch (cmd) { + case IOCTL_VM_SOCKETS_GET_LOCAL_CID: + if (put_user(transport->get_local_cid(), p) != 0) + retval = -EFAULT; + break; + + default: + pr_err("Unknown ioctl %d\n", cmd); + retval = -EINVAL; + } + + return retval; +} + +static long vsock_dev_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg); +} + +#ifdef CONFIG_COMPAT +static long vsock_dev_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg)); +} +#endif + +static const struct file_operations vsock_device_ops = { + .owner = THIS_MODULE, + .unlocked_ioctl = vsock_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vsock_dev_compat_ioctl, +#endif + .open = nonseekable_open, +}; + +static struct miscdevice vsock_device = { + .name = "vsock", + .minor = MISC_DYNAMIC_MINOR, + .fops = &vsock_device_ops, +}; + +static int __vsock_core_init(void) +{ + int err; + + vsock_init_tables(); + + err = misc_register(&vsock_device); + if (err) { + pr_err("Failed to register misc device\n"); + return -ENOENT; + } + + err = proto_register(&vsock_proto, 1); /* we want our slab */ + if (err) { + pr_err("Cannot register vsock protocol\n"); + goto err_misc_deregister; + } + + err = sock_register(&vsock_family_ops); + if (err) { + pr_err("could not register af_vsock (%d) address family: %d\n", + AF_VSOCK, err); + goto err_unregister_proto; + } + + return 0; + +err_unregister_proto: + proto_unregister(&vsock_proto); +err_misc_deregister: + misc_deregister(&vsock_device); + return err; +} + +int vsock_core_init(const struct vsock_transport *t) +{ + int retval = mutex_lock_interruptible(&vsock_register_mutex); + if (retval) + return retval; + + if (transport) { + retval = -EBUSY; + goto out; + } + + transport = t; + retval = __vsock_core_init(); + if (retval) + transport = NULL; + +out: + mutex_unlock(&vsock_register_mutex); + return retval; +} +EXPORT_SYMBOL_GPL(vsock_core_init); + +void vsock_core_exit(void) +{ + mutex_lock(&vsock_register_mutex); + + misc_deregister(&vsock_device); + sock_unregister(AF_VSOCK); + proto_unregister(&vsock_proto); + + /* We do not want the assignment below re-ordered. */ + mb(); + transport = NULL; + + mutex_unlock(&vsock_register_mutex); +} +EXPORT_SYMBOL_GPL(vsock_core_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Virtual Socket Family"); +MODULE_VERSION(VSOCK_DRIVER_VERSION_STRING); +MODULE_LICENSE("GPL v2"); diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h new file mode 100644 index 000000000000..7d64d3609ec9 --- /dev/null +++ b/net/vmw_vsock/af_vsock.h @@ -0,0 +1,175 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __AF_VSOCK_H__ +#define __AF_VSOCK_H__ + +#include +#include +#include + +#include "vsock_addr.h" + +#define LAST_RESERVED_PORT 1023 + +#define vsock_sk(__sk) ((struct vsock_sock *)__sk) +#define sk_vsock(__vsk) (&(__vsk)->sk) + +struct vsock_sock { + /* sk must be the first member. */ + struct sock sk; + struct sockaddr_vm local_addr; + struct sockaddr_vm remote_addr; + /* Links for the global tables of bound and connected sockets. */ + struct list_head bound_table; + struct list_head connected_table; + /* Accessed without the socket lock held. This means it can never be + * modified outsided of socket create or destruct. + */ + bool trusted; + bool cached_peer_allow_dgram; /* Dgram communication allowed to + * cached peer? + */ + u32 cached_peer; /* Context ID of last dgram destination check. */ + const struct cred *owner; + /* Rest are SOCK_STREAM only. */ + long connect_timeout; + /* Listening socket that this came from. */ + struct sock *listener; + /* Used for pending list and accept queue during connection handshake. + * The listening socket is the head for both lists. Sockets created + * for connection requests are placed in the pending list until they + * are connected, at which point they are put in the accept queue list + * so they can be accepted in accept(). If accept() cannot accept the + * connection, it is marked as rejected so the cleanup function knows + * to clean up the socket. + */ + struct list_head pending_links; + struct list_head accept_queue; + bool rejected; + struct delayed_work dwork; + u32 peer_shutdown; + bool sent_request; + bool ignore_connecting_rst; + + /* Private to transport. */ + void *trans; +}; + +s64 vsock_stream_has_data(struct vsock_sock *vsk); +s64 vsock_stream_has_space(struct vsock_sock *vsk); +void vsock_pending_work(struct work_struct *work); +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, unsigned short type); + +/**** TRANSPORT ****/ + +struct vsock_transport_recv_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ + bool notify_on_block; +}; + +struct vsock_transport_send_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ +}; + +struct vsock_transport { + /* Initialize/tear-down socket. */ + int (*init)(struct vsock_sock *, struct vsock_sock *); + void (*destruct)(struct vsock_sock *); + void (*release)(struct vsock_sock *); + + /* Connections. */ + int (*connect)(struct vsock_sock *); + + /* DGRAM. */ + int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); + int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk, + struct msghdr *msg, size_t len, int flags); + int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, + struct iovec *, size_t len); + bool (*dgram_allow)(u32 cid, u32 port); + + /* STREAM. */ + /* TODO: stream_bind() */ + ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *, + size_t len, int flags); + ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *, + size_t len); + s64 (*stream_has_data)(struct vsock_sock *); + s64 (*stream_has_space)(struct vsock_sock *); + u64 (*stream_rcvhiwat)(struct vsock_sock *); + bool (*stream_is_active)(struct vsock_sock *); + bool (*stream_allow)(u32 cid, u32 port); + + /* Notification. */ + int (*notify_poll_in)(struct vsock_sock *, size_t, bool *); + int (*notify_poll_out)(struct vsock_sock *, size_t, bool *); + int (*notify_recv_init)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_block)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t, + ssize_t, bool, struct vsock_transport_recv_notify_data *); + int (*notify_send_init)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_block)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_enqueue)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t, + struct vsock_transport_send_notify_data *); + + /* Shutdown. */ + int (*shutdown)(struct vsock_sock *, int); + + /* Buffer sizes. */ + void (*set_buffer_size)(struct vsock_sock *, u64); + void (*set_min_buffer_size)(struct vsock_sock *, u64); + void (*set_max_buffer_size)(struct vsock_sock *, u64); + u64 (*get_buffer_size)(struct vsock_sock *); + u64 (*get_min_buffer_size)(struct vsock_sock *); + u64 (*get_max_buffer_size)(struct vsock_sock *); + + /* Addressing. */ + u32 (*get_local_cid)(void); +}; + +/**** CORE ****/ + +int vsock_core_init(const struct vsock_transport *t); +void vsock_core_exit(void); + +/**** UTILS ****/ + +void vsock_release_pending(struct sock *pending); +void vsock_add_pending(struct sock *listener, struct sock *pending); +void vsock_remove_pending(struct sock *listener, struct sock *pending); +void vsock_enqueue_accept(struct sock *listener, struct sock *connected); +void vsock_insert_connected(struct vsock_sock *vsk); +void vsock_remove_bound(struct vsock_sock *vsk); +void vsock_remove_connected(struct vsock_sock *vsk); +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst); +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); + +#endif /* __AF_VSOCK_H__ */ diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c new file mode 100644 index 000000000000..e8a87cf37072 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.c @@ -0,0 +1,2157 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#define EXPORT_SYMTAB +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "af_vsock.h" +#include "vmci_transport_notify.h" + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg); +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_recv_pkt_work(struct work_struct *work); +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_server( + struct sock *sk, + struct sock *pending, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_invalid( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt); +static bool vmci_transport_old_proto_override(bool *old_pkt_proto); +static u16 vmci_transport_new_proto_supported_versions(void); +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto, + bool old_pkt_proto); + +struct vmci_transport_recv_pkt_info { + struct work_struct work; + struct sock *sk; + struct vmci_transport_packet pkt; +}; + +static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID, + VMCI_INVALID_ID }; +static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + +static int PROTOCOL_OVERRIDE = -1; + +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN 128 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE 262144 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX 262144 + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +/* Helper function to convert from a VMCI error code to a VSock error code. */ + +static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) +{ + int err; + + switch (vmci_error) { + case VMCI_ERROR_NO_MEM: + err = ENOMEM; + break; + case VMCI_ERROR_DUPLICATE_ENTRY: + case VMCI_ERROR_ALREADY_EXISTS: + err = EADDRINUSE; + break; + case VMCI_ERROR_NO_ACCESS: + err = EPERM; + break; + case VMCI_ERROR_NO_RESOURCES: + err = ENOBUFS; + break; + case VMCI_ERROR_INVALID_RESOURCE: + err = EHOSTUNREACH; + break; + case VMCI_ERROR_INVALID_ARGS: + default: + err = EINVAL; + } + + return err > 0 ? -err : err; +} + +static inline void +vmci_transport_packet_init(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + u8 type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + /* We register the stream control handler as an any cid handle so we + * must always send from a source address of VMADDR_CID_ANY + */ + pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.dst = vmci_make_handle(dst->svm_cid, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg); + pkt->version = VMCI_TRANSPORT_PACKET_VERSION; + pkt->type = type; + pkt->src_port = src->svm_port; + pkt->dst_port = dst->svm_port; + memset(&pkt->proto, 0, sizeof(pkt->proto)); + memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + pkt->u.size = size; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + pkt->u.handle = handle; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + case VMCI_TRANSPORT_PACKET_TYPE_READ: + case VMCI_TRANSPORT_PACKET_TYPE_RST: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + pkt->u.mode = mode; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait)); + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + pkt->u.size = size; + pkt->proto = proto; + break; + } +} + +static inline void +vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt, + struct sockaddr_vm *local, + struct sockaddr_vm *remote) +{ + vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port); + vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port); +} + +static int +__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle, + bool convert_error) +{ + int err; + + vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait, + proto, handle); + err = vmci_datagram_send(&pkt->dg); + if (convert_error && (err < 0)) + return vmci_transport_error_to_vsock_error(err); + + return err; +} + +static int +vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + struct vmci_transport_packet reply; + struct sockaddr_vm src, dst; + + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) { + return 0; + } else { + vmci_transport_packet_get_addresses(pkt, &src, &dst); + return __vmci_transport_send_control_pkt(&reply, &src, &dst, + type, + size, mode, wait, + VSOCK_PROTO_INVALID, + handle, true); + } +} + +static int +vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + /* Note that it is safe to use a single packet across all CPUs since + * two tasklets of the same type are guaranteed to not ever run + * simultaneously. If that ever changes, or VMCI stops using tasklets, + * we can use per-cpu packets. + */ + static struct vmci_transport_packet pkt; + + return __vmci_transport_send_control_pkt(&pkt, src, dst, type, + size, mode, wait, + VSOCK_PROTO_INVALID, handle, + false); +} + +static int +vmci_transport_send_control_pkt(struct sock *sk, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + if (!vsock_addr_bound(&vsk->remote_addr)) + return -EINVAL; + + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr, + &vsk->remote_addr, type, size, + mode, wait, proto, handle, + true); + kfree(pkt); + + return err; +} + +static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_RST, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_reset(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_qp_offer(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0, + 0, NULL, + VSOCK_PROTO_INVALID, handle); +} + +static int vmci_transport_send_attach(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + 0, 0, NULL, VSOCK_PROTO_INVALID, + handle); +} + +static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt) +{ + return vmci_transport_reply_control_pkt_fast( + pkt, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_INVALID, + 0, 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode) +{ + return vmci_transport_send_control_pkt( + &vsk->sk, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + 0, mode, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static struct sock *vmci_transport_get_pending( + struct sock *listener, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + struct sock *pending; + + vlistener = vsock_sk(listener); + + list_for_each_entry(vpending, &vlistener->pending_links, + pending_links) { + struct sockaddr_vm src; + struct sockaddr_vm dst; + + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && + vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pending = sk_vsock(vpending); + sock_hold(pending); + goto found; + } + } + + pending = NULL; +found: + return pending; + +} + +static void vmci_transport_release_pending(struct sock *pending) +{ + sock_put(pending); +} + +/* We allow two kinds of sockets to communicate with a restricted VM: 1) + * trusted sockets 2) sockets from applications running as the same user as the + * VM (this is only true for the host side and only when using hosted products) + */ + +static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid) +{ + return vsock->trusted || + vmci_is_context_owner(peer_cid, vsock->owner->uid); +} + +/* We allow sending datagrams to and receiving datagrams from a restricted VM + * only if it is trusted as described in vmci_transport_is_trusted. + */ + +static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid) +{ + if (vsock->cached_peer != peer_cid) { + vsock->cached_peer = peer_cid; + if (!vmci_transport_is_trusted(vsock, peer_cid) && + (vmci_context_get_priv_flags(peer_cid) & + VMCI_PRIVILEGE_FLAG_RESTRICTED)) { + vsock->cached_peer_allow_dgram = false; + } else { + vsock->cached_peer_allow_dgram = true; + } + } + + return vsock->cached_peer_allow_dgram; +} + +static int +vmci_transport_queue_pair_alloc(struct vmci_qp **qpair, + struct vmci_handle *handle, + u64 produce_size, + u64 consume_size, + u32 peer, u32 flags, bool trusted) +{ + int err = 0; + + if (trusted) { + /* Try to allocate our queue pair as trusted. This will only + * work if vsock is running in the host. + */ + + err = vmci_qpair_alloc(qpair, handle, produce_size, + consume_size, + peer, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED); + if (err != VMCI_ERROR_NO_ACCESS) + goto out; + + } + + err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size, + peer, flags, VMCI_NO_PRIVILEGE_FLAGS); +out: + if (err < 0) { + pr_err("Could not attach to queue pair with %d\n", + err); + err = vmci_transport_error_to_vsock_error(err); + } + + return err; +} + +static int +vmci_transport_datagram_create_hnd(u32 resource_id, + u32 flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, + struct vmci_handle *out_handle) +{ + int err = 0; + + /* Try to allocate our datagram handler as trusted. This will only work + * if vsock is running in the host. + */ + + err = vmci_datagram_create_handle_priv(resource_id, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED, + recv_cb, + client_data, out_handle); + + if (err == VMCI_ERROR_NO_ACCESS) + err = vmci_datagram_create_handle(resource_id, flags, + recv_cb, client_data, + out_handle); + + return err; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context and if it ever needs to + * sleep it should defer that work to a work queue. + */ + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + size_t size; + struct sk_buff *skb; + struct vsock_sock *vsk; + + sk = (struct sock *)data; + + /* This handler is privileged when this module is running on the host. + * We will get datagrams from all endpoints (even VMs that are in a + * restricted context). If we get one from a restricted context then + * the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, dg->src.context)) + return VMCI_ERROR_NO_ACCESS; + + size = VMCI_DG_SIZE(dg); + + /* Attach the packet to the socket's receive queue as an sk_buff. */ + skb = alloc_skb(size, GFP_ATOMIC); + if (skb) { + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, dg, size); + sk_receive_skb(sk, skb, 0); + } + + return VMCI_SUCCESS; +} + +static bool vmci_transport_stream_allow(u32 cid, u32 port) +{ + static const u32 non_socket_contexts[] = { + VMADDR_CID_HYPERVISOR, + VMADDR_CID_RESERVED, + }; + int i; + + BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts)); + + for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) { + if (cid == non_socket_contexts[i]) + return false; + } + + return true; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context but it defers most of + * its work to the packet handling work queue. + */ + +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + struct sockaddr_vm dst; + struct sockaddr_vm src; + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + bool bh_process_pkt; + int err; + + sk = NULL; + err = VMCI_SUCCESS; + bh_process_pkt = false; + + /* Ignore incoming packets from contexts without sockets, or resources + * that aren't vsock implementations. + */ + + if (!vmci_transport_stream_allow(dg->src.context, -1) + || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) + return VMCI_ERROR_NO_ACCESS; + + if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) + /* Drop datagrams that do not contain full VSock packets. */ + return VMCI_ERROR_INVALID_ARGS; + + pkt = (struct vmci_transport_packet *)dg; + + /* Find the socket that should handle this packet. First we look for a + * connected socket and if there is none we look for a socket bound to + * the destintation address. + */ + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + sk = vsock_find_connected_socket(&src, &dst); + if (!sk) { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + /* We could not find a socket for this specified + * address. If this packet is a RST, we just drop it. + * If it is another packet, we send a RST. Note that + * we do not send a RST reply to RSTs so that we do not + * continually send RSTs between two endpoints. + * + * Note that since this is a reply, dst is src and src + * is dst. + */ + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NOT_FOUND; + goto out; + } + } + + /* If the received packet type is beyond all types known to this + * implementation, reply with an invalid message. Hopefully this will + * help when implementing backwards compatibility in the future. + */ + if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) { + vmci_transport_send_invalid_bh(&dst, &src); + err = VMCI_ERROR_INVALID_ARGS; + goto out; + } + + /* This handler is privileged when this module is running on the host. + * We will get datagram connect requests from all endpoints (even VMs + * that are in a restricted context). If we get one from a restricted + * context then the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) { + err = VMCI_ERROR_NO_ACCESS; + goto out; + } + + /* We do most everything in a work queue, but let's fast path the + * notification of reads and writes to help data transfer performance. + * We can only do this if there is no process context code executing + * for this socket since that may change the state. + */ + bh_lock_sock(sk); + + if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED) + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, true, &dst, &src, + &bh_process_pkt); + + bh_unlock_sock(sk); + + if (!bh_process_pkt) { + struct vmci_transport_recv_pkt_info *recv_pkt_info; + + recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC); + if (!recv_pkt_info) { + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NO_MEM; + goto out; + } + + recv_pkt_info->sk = sk; + memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt)); + INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work); + + schedule_work(&recv_pkt_info->work); + /* Clear sk so that the reference count incremented by one of + * the Find functions above is not decremented below. We need + * that reference count for the packet handler we've scheduled + * to run. + */ + sk = NULL; + } + +out: + if (sk) + sock_put(sk); + + return err; +} + +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + + vsk = vsock_sk(sk); + + /* We don't ask for delayed CBs when we subscribe to this event (we + * pass 0 as flags to vmci_event_subscribe()). VMCI makes no + * guarantees in that case about what context we might be running in, + * so it could be BH or process, blockable or non-blockable. So we + * need to account for all possible contexts here. + */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) { + /* XXX This doesn't do anything, but in the future we may want + * to set a flag here to verify the attach really did occur and + * we weren't just sent a datagram claiming it was. + */ + goto out; + } + +out: + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_handle_detach(struct sock *sk) +{ + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + sock_set_flag(sk, SOCK_DONE); + + /* On a detach the peer will not be sending or receiving + * anymore. + */ + vsk->peer_shutdown = SHUTDOWN_MASK; + + /* We should not be sending anymore since the peer won't be + * there to receive, but we can still receive if there is data + * left in our consume queue. + */ + if (vsock_stream_has_data(vsk) <= 0) { + if (sk->sk_state == SS_CONNECTING) { + /* The peer may detach from a queue pair while + * we are still in the connecting state, i.e., + * if the peer VM is killed after attaching to + * a queue pair, but before we complete the + * handshake. In that case, we treat the detach + * event like a reset. + */ + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); + return; + } + sk->sk_state = SS_UNCONNECTED; + } + sk->sk_state_change(sk); + } +} + +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + vsk = vsock_sk(sk); + if (vmci_handle_is_invalid(e_payload->handle)) + return; + + /* Same rules for locking as for peer_attach_cb(). */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) + vmci_transport_handle_detach(sk); + + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_qp_resumed_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + vsock_for_each_connected_socket(vmci_transport_handle_detach); +} + +static void vmci_transport_recv_pkt_work(struct work_struct *work) +{ + struct vmci_transport_recv_pkt_info *recv_pkt_info; + struct vmci_transport_packet *pkt; + struct sock *sk; + + recv_pkt_info = + container_of(work, struct vmci_transport_recv_pkt_info, work); + sk = recv_pkt_info->sk; + pkt = &recv_pkt_info->pkt; + + lock_sock(sk); + + switch (sk->sk_state) { + case SS_LISTEN: + vmci_transport_recv_listen(sk, pkt); + break; + case SS_CONNECTING: + /* Processing of pending connections for servers goes through + * the listening socket, so see vmci_transport_recv_listen() + * for that path. + */ + vmci_transport_recv_connecting_client(sk, pkt); + break; + case SS_CONNECTED: + vmci_transport_recv_connected(sk, pkt); + break; + default: + /* Because this function does not run in the same context as + * vmci_transport_recv_stream_cb it is possible that the + * socket has closed. We need to let the other side know or it + * could be sitting in a connect and hang forever. Send a + * reset to prevent that. + */ + vmci_transport_send_reset(sk, pkt); + goto out; + } + +out: + release_sock(sk); + kfree(recv_pkt_info); + /* Release reference obtained in the stream callback when we fetched + * this socket out of the bound or connected list. + */ + sock_put(sk); +} + +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct sock *pending; + struct vsock_sock *vpending; + int err; + u64 qp_size; + bool old_request = false; + bool old_pkt_proto = false; + + err = 0; + + /* Because we are in the listen state, we could be receiving a packet + * for ourself or any previous connection requests that we received. + * If it's the latter, we try to find a socket in our list of pending + * connections and, if we do, call the appropriate handler for the + * state that that socket is in. Otherwise we try to service the + * connection request. + */ + pending = vmci_transport_get_pending(sk, pkt); + if (pending) { + lock_sock(pending); + switch (pending->sk_state) { + case SS_CONNECTING: + err = vmci_transport_recv_connecting_server(sk, + pending, + pkt); + break; + default: + vmci_transport_send_reset(pending, pkt); + err = -EINVAL; + } + + if (err < 0) + vsock_remove_pending(sk, pending); + + release_sock(pending); + vmci_transport_release_pending(pending); + + return err; + } + + /* The listen state only accepts connection requests. Reply with a + * reset unless we received a reset. + */ + + if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST || + pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + if (pkt->u.size == 0) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + /* If this socket can't accommodate this connection request, we send a + * reset. Otherwise we create and initialize a child socket and reply + * with a connection negotiation. + */ + if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { + vmci_transport_reply_reset(pkt); + return -ECONNREFUSED; + } + + pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type); + if (!pending) { + vmci_transport_send_reset(sk, pkt); + return -ENOMEM; + } + + vpending = vsock_sk(pending); + + vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context, + pkt->dst_port); + vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context, + pkt->src_port); + + /* If the proposed size fits within our min/max, accept it. Otherwise + * propose our own size. + */ + if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size && + pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) { + qp_size = pkt->u.size; + } else { + qp_size = vmci_trans(vpending)->queue_pair_size; + } + + /* Figure out if we are using old or new requests based on the + * overrides pkt types sent by our peer. + */ + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_request = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST) + old_request = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2) + old_request = false; + + } + + if (old_request) { + /* Handle a REQUEST (or override) */ + u16 version = VSOCK_PROTO_INVALID; + if (vmci_transport_proto_to_notify_struct( + pending, &version, true)) + err = vmci_transport_send_negotiate(pending, qp_size); + else + err = -EINVAL; + + } else { + /* Handle a REQUEST2 (or override) */ + int proto_int = pkt->proto; + int pos; + u16 active_proto_version = 0; + + /* The list of possible protocols is the intersection of all + * protocols the client supports ... plus all the protocols we + * support. + */ + proto_int &= vmci_transport_new_proto_supported_versions(); + + /* We choose the highest possible protocol version and use that + * one. + */ + pos = fls(proto_int); + if (pos) { + active_proto_version = (1 << (pos - 1)); + if (vmci_transport_proto_to_notify_struct( + pending, &active_proto_version, false)) + err = vmci_transport_send_negotiate2(pending, + qp_size, + active_proto_version); + else + err = -EINVAL; + + } else { + err = -EINVAL; + } + } + + if (err < 0) { + vmci_transport_send_reset(sk, pkt); + sock_put(pending); + err = vmci_transport_error_to_vsock_error(err); + goto out; + } + + vsock_add_pending(sk, pending); + sk->sk_ack_backlog++; + + pending->sk_state = SS_CONNECTING; + vmci_trans(vpending)->produce_size = + vmci_trans(vpending)->consume_size = qp_size; + vmci_trans(vpending)->queue_pair_size = qp_size; + + vmci_trans(vpending)->notify_ops->process_request(pending); + + /* We might never receive another message for this socket and it's not + * connected to any process, so we have to ensure it gets cleaned up + * ourself. Our delayed work function will take care of that. Note + * that we do not ever cancel this function since we have few + * guarantees about its state when calling cancel_delayed_work(). + * Instead we hold a reference on the socket for that function and make + * it capable of handling cases where it needs to do nothing but + * release that reference. + */ + vpending->listener = sk; + sock_hold(sk); + sock_hold(pending); + INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); + schedule_delayed_work(&vpending->dwork, HZ); + +out: + return err; +} + +static int +vmci_transport_recv_connecting_server(struct sock *listener, + struct sock *pending, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vpending; + struct vmci_handle handle; + struct vmci_qp *qpair; + bool is_local; + u32 flags; + u32 detach_sub_id; + int err; + int skerr; + + vpending = vsock_sk(pending); + detach_sub_id = VMCI_INVALID_ID; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + if (vmci_handle_is_invalid(pkt->u.handle)) { + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + break; + default: + /* Close and cleanup the connection. */ + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL; + goto destroy; + } + + /* In order to complete the connection we need to attach to the offered + * queue pair and send an attach notification. We also subscribe to the + * detach event so we know when our peer goes away, and we do that + * before attaching so we don't miss an event. If all this succeeds, + * we update our state and wakeup anything waiting in accept() for a + * connection. + */ + + /* We don't care about attach since we ensure the other side has + * attached by specifying the ATTACH_ONLY flag below. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + pending, &detach_sub_id); + if (err < VMCI_SUCCESS) { + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->detach_sub_id = detach_sub_id; + + /* Now attach to the queue pair the client created. */ + handle = pkt->u.handle; + + /* vpending->local_addr always has a context id so we do not need to + * worry about VMADDR_CID_ANY in this case. + */ + is_local = + vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid; + flags = VMCI_QPFLAG_ATTACH_ONLY; + flags |= is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc( + &qpair, + &handle, + vmci_trans(vpending)->produce_size, + vmci_trans(vpending)->consume_size, + pkt->dg.src.context, + flags, + vmci_transport_is_trusted( + vpending, + vpending->remote_addr.svm_cid)); + if (err < 0) { + vmci_transport_send_reset(pending, pkt); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->qp_handle = handle; + vmci_trans(vpending)->qpair = qpair; + + /* When we send the attach message, we must be ready to handle incoming + * control messages on the newly connected socket. So we move the + * pending socket to the connected state before sending the attach + * message. Otherwise, an incoming packet triggered by the attach being + * received by the peer may be processed concurrently with what happens + * below after sending the attach message, and that incoming packet + * will find the listening socket instead of the (currently) pending + * socket. Note that enqueueing the socket increments the reference + * count, so even if a reset comes before the connection is accepted, + * the socket will be valid until it is removed from the queue. + * + * If we fail sending the attach below, we remove the socket from the + * connected list and move the socket to SS_UNCONNECTED before + * releasing the lock, so a pending slow path processing of an incoming + * packet will not see the socket in the connected state in that case. + */ + pending->sk_state = SS_CONNECTED; + + vsock_insert_connected(vpending); + + /* Notify our peer of our attach. */ + err = vmci_transport_send_attach(pending, handle); + if (err < 0) { + vsock_remove_connected(vpending); + pr_err("Could not send attach\n"); + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + /* We have a connection. Move the now connected socket from the + * listener's pending list to the accept queue so callers of accept() + * can find it. + */ + vsock_remove_pending(listener, pending); + vsock_enqueue_accept(listener, pending); + + /* Callers of accept() will be be waiting on the listening socket, not + * the pending socket. + */ + listener->sk_state_change(listener); + + return 0; + +destroy: + pending->sk_err = skerr; + pending->sk_state = SS_UNCONNECTED; + /* As long as we drop our reference, all necessary cleanup will handle + * when the cleanup function drops its reference and our destruct + * implementation is called. Note that since the listen handler will + * remove pending from the pending list upon our failure, the cleanup + * function won't drop the additional reference, which is why we do it + * here. + */ + sock_put(pending); + + return err; +} + +static int +vmci_transport_recv_connecting_client(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + int err; + int skerr; + + vsk = vsock_sk(sk); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + if (vmci_handle_is_invalid(pkt->u.handle) || + !vmci_handle_is_equal(pkt->u.handle, + vmci_trans(vsk)->qp_handle)) { + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + /* Signify the socket is connected and wakeup the waiter in + * connect(). Also place the socket in the connected table for + * accounting (it can already be found since it's in the bound + * table). + */ + sk->sk_state = SS_CONNECTED; + sk->sk_socket->state = SS_CONNECTED; + vsock_insert_connected(vsk); + sk->sk_state_change(sk); + + break; + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + if (pkt->u.size == 0 + || pkt->dg.src.context != vsk->remote_addr.svm_cid + || pkt->src_port != vsk->remote_addr.svm_port + || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle) + || vmci_trans(vsk)->qpair + || vmci_trans(vsk)->produce_size != 0 + || vmci_trans(vsk)->consume_size != 0 + || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID + || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + skerr = EPROTO; + err = -EINVAL; + + goto destroy; + } + + err = vmci_transport_recv_connecting_client_negotiate(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + err = vmci_transport_recv_connecting_client_invalid(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_RST: + /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to + * continue processing here after they sent an INVALID packet. + * This meant that we got a RST after the INVALID. We ignore a + * RST after an INVALID. The common code doesn't send the RST + * ... so we can hang if an old version of the common code + * fails between getting a REQUEST and sending an OFFER back. + * Not much we can do about it... except hope that it doesn't + * happen. + */ + if (vsk->ignore_connecting_rst) { + vsk->ignore_connecting_rst = false; + } else { + skerr = ECONNRESET; + err = 0; + goto destroy; + } + + break; + default: + /* Close and cleanup the connection. */ + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + return 0; + +destroy: + vmci_transport_send_reset(sk, pkt); + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = skerr; + sk->sk_error_report(sk); + return err; +} + +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err; + struct vsock_sock *vsk; + struct vmci_handle handle; + struct vmci_qp *qpair; + u32 attach_sub_id; + u32 detach_sub_id; + bool is_local; + u32 flags; + bool old_proto = true; + bool old_pkt_proto; + u16 version; + + vsk = vsock_sk(sk); + handle = VMCI_INVALID_HANDLE; + attach_sub_id = VMCI_INVALID_ID; + detach_sub_id = VMCI_INVALID_ID; + + /* If we have gotten here then we should be past the point where old + * linux vsock could have sent the bogus rst. + */ + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + + /* Verify that we're OK with the proposed queue pair size */ + if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size || + pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) { + err = -EINVAL; + goto destroy; + } + + /* At this point we know the CID the peer is using to talk to us. */ + + if (vsk->local_addr.svm_cid == VMADDR_CID_ANY) + vsk->local_addr.svm_cid = pkt->dg.dst.context; + + /* Setup the notify ops to be the highest supported version that both + * the server and the client support. + */ + + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_proto = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE) + old_proto = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2) + old_proto = false; + + } + + if (old_proto) + version = VSOCK_PROTO_INVALID; + else + version = pkt->proto; + + if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) { + err = -EINVAL; + goto destroy; + } + + /* Subscribe to attach and detach events first. + * + * XXX We attach once for each queue pair created for now so it is easy + * to find the socket (it's provided), but later we should only + * subscribe once and add a way to lookup sockets by queue pair handle. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH, + vmci_transport_peer_attach_cb, + sk, &attach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + sk, &detach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + /* Make VMCI select the handle for us. */ + handle = VMCI_INVALID_HANDLE; + is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid; + flags = is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc(&qpair, + &handle, + pkt->u.size, + pkt->u.size, + vsk->remote_addr.svm_cid, + flags, + vmci_transport_is_trusted( + vsk, + vsk-> + remote_addr.svm_cid)); + if (err < 0) + goto destroy; + + err = vmci_transport_send_qp_offer(sk, handle); + if (err < 0) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + vmci_trans(vsk)->qp_handle = handle; + vmci_trans(vsk)->qpair = qpair; + + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = + pkt->u.size; + + vmci_trans(vsk)->attach_sub_id = attach_sub_id; + vmci_trans(vsk)->detach_sub_id = detach_sub_id; + + vmci_trans(vsk)->notify_ops->process_negotiate(sk); + + return 0; + +destroy: + if (attach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(attach_sub_id); + + if (detach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(detach_sub_id); + + if (!vmci_handle_is_invalid(handle)) + vmci_qpair_detach(&qpair); + + return err; +} + +static int +vmci_transport_recv_connecting_client_invalid(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err = 0; + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsk->sent_request) { + vsk->sent_request = false; + vsk->ignore_connecting_rst = true; + + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) + err = vmci_transport_error_to_vsock_error(err); + else + err = 0; + + } + + return err; +} + +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + bool pkt_processed = false; + + /* In cases where we are closing the connection, it's sufficient to + * mark the state change (and maybe error) and wake up any waiting + * threads. Since this is a connected socket, it's owned by a user + * process and will be cleaned up when the failure is passed back on + * the current or next system call. Our system call implementations + * must therefore check for error and state changes on entry and when + * being awoken. + */ + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + if (pkt->u.mode) { + vsk = vsock_sk(sk); + + vsk->peer_shutdown |= pkt->u.mode; + sk->sk_state_change(sk); + } + break; + + case VMCI_TRANSPORT_PACKET_TYPE_RST: + vsk = vsock_sk(sk); + /* It is possible that we sent our peer a message (e.g a + * WAITING_READ) right before we got notified that the peer had + * detached. If that happens then we can get a RST pkt back + * from our peer even though there is data available for us to + * read. In that case, don't shutdown the socket completely but + * instead allow the local client to finish reading data off + * the queuepair. Always treat a RST pkt in connected mode like + * a clean shutdown. + */ + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + + sk->sk_state_change(sk); + break; + + default: + vsk = vsock_sk(sk); + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, false, NULL, NULL, + &pkt_processed); + if (!pkt_processed) + return -EINVAL; + + break; + } + + return 0; +} + +static int vmci_transport_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk) +{ + vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL); + if (!vsk->trans) + return -ENOMEM; + + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qpair = NULL; + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0; + vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id = + VMCI_INVALID_ID; + vmci_trans(vsk)->notify_ops = NULL; + if (psk) { + vmci_trans(vsk)->queue_pair_size = + vmci_trans(psk)->queue_pair_size; + vmci_trans(vsk)->queue_pair_min_size = + vmci_trans(psk)->queue_pair_min_size; + vmci_trans(vsk)->queue_pair_max_size = + vmci_trans(psk)->queue_pair_max_size; + } else { + vmci_trans(vsk)->queue_pair_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE; + vmci_trans(vsk)->queue_pair_min_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN; + vmci_trans(vsk)->queue_pair_max_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX; + } + + return 0; +} + +static void vmci_transport_destruct(struct vsock_sock *vsk) +{ + if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id); + vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID; + } + + if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id); + vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; + } + + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + vmci_qpair_detach(&vmci_trans(vsk)->qpair); + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->produce_size = 0; + vmci_trans(vsk)->consume_size = 0; + } + + if (vmci_trans(vsk)->notify_ops) + vmci_trans(vsk)->notify_ops->socket_destruct(vsk); + + kfree(vsk->trans); + vsk->trans = NULL; +} + +static void vmci_transport_release(struct vsock_sock *vsk) +{ + if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { + vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + } +} + +static int vmci_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + u32 port; + u32 flags; + int err; + + /* VMCI will select a resource ID for us if we provide + * VMCI_INVALID_ID. + */ + port = addr->svm_port == VMADDR_PORT_ANY ? + VMCI_INVALID_ID : addr->svm_port; + + if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + flags = addr->svm_cid == VMADDR_CID_ANY ? + VMCI_FLAG_ANYCID_DG_HND : 0; + + err = vmci_transport_datagram_create_hnd(port, flags, + vmci_transport_recv_dgram_cb, + &vsk->sk, + &vmci_trans(vsk)->dg_handle); + if (err < VMCI_SUCCESS) + return vmci_transport_error_to_vsock_error(err); + vsock_addr_init(&vsk->local_addr, addr->svm_cid, + vmci_trans(vsk)->dg_handle.resource); + + return 0; +} + +static int vmci_transport_dgram_enqueue( + struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct iovec *iov, + size_t len) +{ + int err; + struct vmci_datagram *dg; + + if (len > VMCI_MAX_DG_PAYLOAD_SIZE) + return -EMSGSIZE; + + if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid)) + return -EPERM; + + /* Allocate a buffer for the user's message and our packet header. */ + dg = kmalloc(len + sizeof(*dg), GFP_KERNEL); + if (!dg) + return -ENOMEM; + + memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len); + + dg->dst = vmci_make_handle(remote_addr->svm_cid, + remote_addr->svm_port); + dg->src = vmci_make_handle(vsk->local_addr.svm_cid, + vsk->local_addr.svm_port); + dg->payload_size = len; + + err = vmci_datagram_send(dg); + kfree(dg); + if (err < 0) + return vmci_transport_error_to_vsock_error(err); + + return err - sizeof(*dg); +} + +static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, + struct vsock_sock *vsk, + struct msghdr *msg, size_t len, + int flags) +{ + int err; + int noblock; + struct vmci_datagram *dg; + size_t payload_len; + struct sk_buff *skb; + + noblock = flags & MSG_DONTWAIT; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) + return -EOPNOTSUPP; + + /* Retrieve the head sk_buff from the socket's receive queue. */ + err = 0; + skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + if (err) + return err; + + if (!skb) + return -EAGAIN; + + dg = (struct vmci_datagram *)skb->data; + if (!dg) + /* err is 0, meaning we read zero bytes. */ + goto out; + + payload_len = dg->payload_size; + /* Ensure the sk_buff matches the payload size claimed in the packet. */ + if (payload_len != skb->len - sizeof(*dg)) { + err = -EINVAL; + goto out; + } + + if (payload_len > len) { + payload_len = len; + msg->msg_flags |= MSG_TRUNC; + } + + /* Place the datagram payload in the user's iovec. */ + err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov, + payload_len); + if (err) + goto out; + + msg->msg_namelen = 0; + if (msg->msg_name) { + struct sockaddr_vm *vm_addr; + + /* Provide the address of the sender. */ + vm_addr = (struct sockaddr_vm *)msg->msg_name; + vsock_addr_init(vm_addr, dg->src.context, dg->src.resource); + msg->msg_namelen = sizeof(*vm_addr); + } + err = payload_len; + +out: + skb_free_datagram(&vsk->sk, skb); + return err; +} + +static bool vmci_transport_dgram_allow(u32 cid, u32 port) +{ + if (cid == VMADDR_CID_HYPERVISOR) { + /* Registrations of PBRPC Servers do not modify VMX/Hypervisor + * state and are allowed. + */ + return port == VMCI_UNITY_PBRPC_REGISTER; + } + + return true; +} + +static int vmci_transport_connect(struct vsock_sock *vsk) +{ + int err; + bool old_pkt_proto = false; + struct sock *sk = &vsk->sk; + + if (vmci_transport_old_proto_override(&old_pkt_proto) && + old_pkt_proto) { + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + } else { + int supported_proto_versions = + vmci_transport_new_proto_supported_versions(); + err = vmci_transport_send_conn_request2( + sk, vmci_trans(vsk)->queue_pair_size, + supported_proto_versions); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + + vsk->sent_request = true; + } + + return err; +} + +static ssize_t vmci_transport_stream_dequeue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len, + int flags) +{ + if (flags & MSG_PEEK) + return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0); + else + return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static ssize_t vmci_transport_stream_enqueue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len) +{ + return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) +{ + return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair); +} + +static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk) +{ + return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair); +} + +static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->consume_size; +} + +static bool vmci_transport_stream_is_active(struct vsock_sock *vsk) +{ + return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle); +} + +static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_size; +} + +static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_min_size; +} + +static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_max_size; +} + +static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_min_size) + vmci_trans(vsk)->queue_pair_min_size = val; + if (val > vmci_trans(vsk)->queue_pair_max_size) + vmci_trans(vsk)->queue_pair_max_size = val; + vmci_trans(vsk)->queue_pair_size = val; +} + +static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val > vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_min_size = val; +} + +static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_max_size = val; +} + +static int vmci_transport_notify_poll_in( + struct vsock_sock *vsk, + size_t target, + bool *data_ready_now) +{ + return vmci_trans(vsk)->notify_ops->poll_in( + &vsk->sk, target, data_ready_now); +} + +static int vmci_transport_notify_poll_out( + struct vsock_sock *vsk, + size_t target, + bool *space_available_now) +{ + return vmci_trans(vsk)->notify_ops->poll_out( + &vsk->sk, target, space_available_now); +} + +static int vmci_transport_notify_recv_init( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_init( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_block( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_block( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_dequeue( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_dequeue( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_post_dequeue( + struct vsock_sock *vsk, + size_t target, + ssize_t copied, + bool data_read, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_post_dequeue( + &vsk->sk, target, copied, data_read, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_send_init( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_init( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_block( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_block( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_enqueue( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_enqueue( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_post_enqueue( + struct vsock_sock *vsk, + ssize_t written, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_post_enqueue( + &vsk->sk, written, + (struct vmci_transport_send_notify_data *)data); +} + +static bool vmci_transport_old_proto_override(bool *old_pkt_proto) +{ + if (PROTOCOL_OVERRIDE != -1) { + if (PROTOCOL_OVERRIDE == 0) + *old_pkt_proto = true; + else + *old_pkt_proto = false; + + pr_info("Proto override in use\n"); + return true; + } + + return false; +} + +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, + u16 *proto, + bool old_pkt_proto) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (old_pkt_proto) { + if (*proto != VSOCK_PROTO_INVALID) { + pr_err("Can't set both an old and new protocol\n"); + return false; + } + vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops; + goto exit; + } + + switch (*proto) { + case VSOCK_PROTO_PKT_ON_NOTIFY: + vmci_trans(vsk)->notify_ops = + &vmci_transport_notify_pkt_q_state_ops; + break; + default: + pr_err("Unknown notify protocol version\n"); + return false; + } + +exit: + vmci_trans(vsk)->notify_ops->socket_init(sk); + return true; +} + +static u16 vmci_transport_new_proto_supported_versions(void) +{ + if (PROTOCOL_OVERRIDE != -1) + return PROTOCOL_OVERRIDE; + + return VSOCK_PROTO_ALL_SUPPORTED; +} + +static u32 vmci_transport_get_local_cid(void) +{ + return vmci_get_context_id(); +} + +static struct vsock_transport vmci_transport = { + .init = vmci_transport_socket_init, + .destruct = vmci_transport_destruct, + .release = vmci_transport_release, + .connect = vmci_transport_connect, + .dgram_bind = vmci_transport_dgram_bind, + .dgram_dequeue = vmci_transport_dgram_dequeue, + .dgram_enqueue = vmci_transport_dgram_enqueue, + .dgram_allow = vmci_transport_dgram_allow, + .stream_dequeue = vmci_transport_stream_dequeue, + .stream_enqueue = vmci_transport_stream_enqueue, + .stream_has_data = vmci_transport_stream_has_data, + .stream_has_space = vmci_transport_stream_has_space, + .stream_rcvhiwat = vmci_transport_stream_rcvhiwat, + .stream_is_active = vmci_transport_stream_is_active, + .stream_allow = vmci_transport_stream_allow, + .notify_poll_in = vmci_transport_notify_poll_in, + .notify_poll_out = vmci_transport_notify_poll_out, + .notify_recv_init = vmci_transport_notify_recv_init, + .notify_recv_pre_block = vmci_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue, + .notify_send_init = vmci_transport_notify_send_init, + .notify_send_pre_block = vmci_transport_notify_send_pre_block, + .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue, + .shutdown = vmci_transport_shutdown, + .set_buffer_size = vmci_transport_set_buffer_size, + .set_min_buffer_size = vmci_transport_set_min_buffer_size, + .set_max_buffer_size = vmci_transport_set_max_buffer_size, + .get_buffer_size = vmci_transport_get_buffer_size, + .get_min_buffer_size = vmci_transport_get_min_buffer_size, + .get_max_buffer_size = vmci_transport_get_max_buffer_size, + .get_local_cid = vmci_transport_get_local_cid, +}; + +static int __init vmci_transport_init(void) +{ + int err; + + /* Create the datagram handle that we will use to send and receive all + * VSocket control messages for this context. + */ + err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID, + VMCI_FLAG_ANYCID_DG_HND, + vmci_transport_recv_stream_cb, + NULL, + &vmci_transport_stream_handle); + if (err < VMCI_SUCCESS) { + pr_err("Unable to create datagram handle. (%d)\n", err); + return vmci_transport_error_to_vsock_error(err); + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED, + vmci_transport_qp_resumed_cb, + NULL, &vmci_transport_qp_resumed_sub_id); + if (err < VMCI_SUCCESS) { + pr_err("Unable to subscribe to resumed event. (%d)\n", err); + err = vmci_transport_error_to_vsock_error(err); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + goto err_destroy_stream_handle; + } + + err = vsock_core_init(&vmci_transport); + if (err < 0) + goto err_unsubscribe; + + return 0; + +err_unsubscribe: + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); +err_destroy_stream_handle: + vmci_datagram_destroy_handle(vmci_transport_stream_handle); + return err; +} +module_init(vmci_transport_init); + +static void __exit vmci_transport_exit(void) +{ + if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) { + if (vmci_datagram_destroy_handle( + vmci_transport_stream_handle) != VMCI_SUCCESS) + pr_err("Couldn't destroy datagram handle\n"); + vmci_transport_stream_handle = VMCI_INVALID_HANDLE; + } + + if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + } + + vsock_core_exit(); +} +module_exit(vmci_transport_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("vmware_vsock"); +MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h new file mode 100644 index 000000000000..1bf991803ec0 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.h @@ -0,0 +1,139 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VMCI_TRANSPORT_H_ +#define _VMCI_TRANSPORT_H_ + +#include +#include + +#include "vsock_addr.h" +#include "af_vsock.h" + +/* If the packet format changes in a release then this should change too. */ +#define VMCI_TRANSPORT_PACKET_VERSION 1 + +/* The resource ID on which control packets are sent. */ +#define VMCI_TRANSPORT_PACKET_RID 1 + +#define VSOCK_PROTO_INVALID 0 +#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0) +#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) + +#define vmci_trans(_vsk) ((struct vmci_transport *)((_vsk)->trans)) + +enum vmci_transport_packet_type { + VMCI_TRANSPORT_PACKET_TYPE_INVALID = 0, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + VMCI_TRANSPORT_PACKET_TYPE_OFFER, + VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, + VMCI_TRANSPORT_PACKET_TYPE_READ, + VMCI_TRANSPORT_PACKET_TYPE_RST, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + VMCI_TRANSPORT_PACKET_TYPE_MAX +}; + +struct vmci_transport_waiting_info { + u64 generation; + u64 offset; +}; + +/* Control packet type for STREAM sockets. DGRAMs have no control packets nor + * special packet header for data packets, they are just raw VMCI DGRAM + * messages. For STREAMs, control packets are sent over the control channel + * while data is written and read directly from queue pairs with no packet + * format. + */ +struct vmci_transport_packet { + struct vmci_datagram dg; + u8 version; + u8 type; + u16 proto; + u32 src_port; + u32 dst_port; + u32 _reserved2; + union { + u64 size; + u64 mode; + struct vmci_handle handle; + struct vmci_transport_waiting_info wait; + } u; +}; + +struct vmci_transport_notify_pkt { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_read; + bool peer_waiting_write; + bool peer_waiting_write_detected; + bool sent_waiting_read; + bool sent_waiting_write; + struct vmci_transport_waiting_info peer_waiting_read_info; + struct vmci_transport_waiting_info peer_waiting_write_info; + u64 produce_q_generation; + u64 consume_q_generation; +}; + +struct vmci_transport_notify_pkt_q_state { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_write; + bool peer_waiting_write_detected; +}; + +union vmci_transport_notify { + struct vmci_transport_notify_pkt pkt; + struct vmci_transport_notify_pkt_q_state pkt_q_state; +}; + +/* Our transport-specific data. */ +struct vmci_transport { + /* For DGRAMs. */ + struct vmci_handle dg_handle; + /* For STREAMs. */ + struct vmci_handle qp_handle; + struct vmci_qp *qpair; + u64 produce_size; + u64 consume_size; + u64 queue_pair_size; + u64 queue_pair_min_size; + u64 queue_pair_max_size; + u32 attach_sub_id; + u32 detach_sub_id; + union vmci_transport_notify notify; + struct vmci_transport_notify_ops *notify_ops; +}; + +int vmci_transport_register(void); +void vmci_transport_unregister(void); + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_wrote(struct sock *sk); +int vmci_transport_send_read(struct sock *sk); +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait); +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait); + +#endif diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c new file mode 100644 index 000000000000..9a730744e7bc --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -0,0 +1,680 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); +#else + notify_limit = 0; +#endif + + /* For now we ignore the wait information and just see if the free + * space exceeds the notify limit. Note that improving this function + * to be more intelligent will not require a protocol change and will + * retain compatibility between endpoints with mixed versions of this + * function. + * + * The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (retval) { + /* + * Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } +#endif + return retval; +#else + return true; +#endif +} + +static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + if (!PKT_FIELD(vsk, peer_waiting_read)) + return false; + + /* For now we ignore the wait information and just see if there is any + * data for our peer to read. Note that improving this function to be + * more intelligent will not require a protocol change and will retain + * compatibility between endpoints with mixed versions of this + * function. + */ + return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0; +#else + return true; +#endif +} + +static void +vmci_transport_handle_waiting_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_read) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + + if (vmci_transport_notify_waiting_read(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_wrote_bh(dst, src) > 0; + else + sent = vmci_transport_send_wrote(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_read) = false; + } +#endif +} + +static void +vmci_transport_handle_waiting_write(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_write) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); + + if (vmci_transport_notify_waiting_write(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_read_bh(dst, src) > 0; + else + sent = vmci_transport_send_read(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_write) = false; + } +#endif +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_write) = false; +#endif + + sk->sk_write_space(sk); +} + +static bool send_waiting_read(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_read)) + return true; + + if (PKT_FIELD(vsk, write_notify_window) < + vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); + + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->consume_size - head; + if (room_needed >= room_left) { + waiting_info.offset = room_needed - room_left; + waiting_info.generation = + PKT_FIELD(vsk, consume_q_generation) + 1; + } else { + waiting_info.offset = head + room_needed; + waiting_info.generation = PKT_FIELD(vsk, consume_q_generation); + } + + ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_read) = true; + + return ret; +#else + return true; +#endif +} + +static bool send_waiting_write(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_write)) + return true; + + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->produce_size - tail; + if (room_needed + 1 >= room_left) { + /* Wraps around to current generation. */ + waiting_info.offset = room_needed + 1 - room_left; + waiting_info.generation = PKT_FIELD(vsk, produce_q_generation); + } else { + waiting_info.offset = tail + room_needed + 1; + waiting_info.generation = + PKT_FIELD(vsk, produce_q_generation) - 1; + } + + ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_write) = true; + + return ret; +#else + return true; +#endif +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) + pr_err("%p unable to send read notify to peer\n", sk); + else +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_write) = false; +#endif + + } + return err; +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_read) = false; +#endif + sk->sk_data_ready(sk, 0); +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_read) = false; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + PKT_FIELD(vsk, sent_waiting_read) = false; + PKT_FIELD(vsk, sent_waiting_write) = false; + PKT_FIELD(vsk, produce_q_generation) = 0; + PKT_FIELD(vsk, consume_q_generation) = 0; + + memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) { + if (!send_waiting_read(sk, 1)) + return -1; + + } + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Notify the peer that we are waiting if the queue is full. We + * only send a waiting write if the queue is full because + * otherwise we end up in an infinite WAITING_WRITE, READ, + * WAITING_WRITE, READ, etc. loop. Treat failing to send the + * notification as a socket error, passing that back through + * the mask. + */ + if (!send_waiting_write(sk, 1)) + return -1; + + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } +#endif +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + /* Notify our peer that we are waiting for data to read. */ + if (!send_waiting_read(sk, target)) { + err = -EHOSTUNREACH; + return err; + } +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + data->notify_on_block = false; + } +#endif + + return err; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + /* Now consume up to len bytes from the queue. Note that since we have + * the socket locked we should copy at least ready bytes. + */ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note + * that this is safe since we hold the socket lock across the + * two queue pair operations. + */ + if (copied >= + vmci_trans(vsk)->consume_size - data->consume_head) + PKT_FIELD(vsk, consume_q_generation)++; +#endif + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + } + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + /* Notify our peer that we are waiting for room to write. */ + if (!send_waiting_write(sk, 1)) + return -EHOSTUNREACH; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + int retries = 0; + + vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note that this + * is safe since we hold the socket lock across the two queue pair + * operations. + */ + if (written >= vmci_trans(vsk)->produce_size - data->produce_tail) + PKT_FIELD(vsk, produce_q_generation)++; + +#endif + + if (vmci_transport_notify_waiting_read(vsk)) { + /* Notify the peer that we have written, retrying the send on + * failure up to our maximum value. See the XXX comment for the + * corresponding piece of code in StreamRecvmsg() for potential + * improvements. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + pr_err("%p unable to send wrote notify to peer\n", sk); + return err; + } else { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_read) = false; +#endif + } + } + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + vmci_transport_handle_waiting_write(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + vmci_transport_handle_waiting_read(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +/* Socket control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h new file mode 100644 index 000000000000..7df793249b6c --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.h @@ -0,0 +1,83 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __VMCI_TRANSPORT_NOTIFY_H__ +#define __VMCI_TRANSPORT_NOTIFY_H__ + +#include +#include +#include +#include + +#include "vmci_transport.h" + +/* Comment this out to compare with old protocol. */ +#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1 +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) +/* Comment this out to remove flow control for "new" protocol */ +#define VSOCK_OPTIMIZATION_FLOW_CONTROL 1 +#endif + +#define VMCI_TRANSPORT_MAX_DGRAM_RESENDS 10 + +struct vmci_transport_recv_notify_data { + u64 consume_head; + u64 produce_tail; + bool notify_on_block; +}; + +struct vmci_transport_send_notify_data { + u64 consume_head; + u64 produce_tail; +}; + +/* Socket notification callbacks. */ +struct vmci_transport_notify_ops { + void (*socket_init) (struct sock *sk); + void (*socket_destruct) (struct vsock_sock *vsk); + int (*poll_in) (struct sock *sk, size_t target, + bool *data_ready_now); + int (*poll_out) (struct sock *sk, size_t target, + bool *space_avail_now); + void (*handle_notify_pkt) (struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, struct sockaddr_vm *dst, + struct sockaddr_vm *src, + bool *pkt_processed); + int (*recv_init) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_block) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_dequeue) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_post_dequeue) (struct sock *sk, size_t target, + ssize_t copied, bool data_read, + struct vmci_transport_recv_notify_data *data); + int (*send_init) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_block) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_enqueue) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_post_enqueue) (struct sock *sk, ssize_t written, + struct vmci_transport_send_notify_data *data); + void (*process_request) (struct sock *sk); + void (*process_negotiate) (struct sock *sk); +}; + +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; + +#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c new file mode 100644 index 000000000000..622bd7aa1016 --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -0,0 +1,438 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) \ + (vmci_trans(vsk)->notify.pkt_q_state.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); + + /* The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; + + if (retval) { + /* Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } + return retval; +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_write_space(sk); +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_data_ready(sk, 0); +} + +static void vsock_block_update_write_window(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read) + pr_err("%p unable to send read notification to peer\n", + sk); + else + PKT_FIELD(vsk, peer_waiting_write) = false; + + } + return err; +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) + vsock_block_update_write_window(sk); + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Nothing else to do. + */ + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + data->consume_head = 0; + data->produce_tail = 0; + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + vsock_block_update_write_window(sk); + + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + data->notify_on_block = false; + } + + return err; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + bool was_full = false; + u64 free_space; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { + smp_mb(); + + free_space = + vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair); + was_full = free_space == copied; + + if (was_full) + PKT_FIELD(vsk, peer_waiting_write) = true; + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + /* See the comment in + * vmci_transport_notify_pkt_send_post_enqueue(). + */ + sk->sk_data_ready(sk, 0); + } + + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + data->consume_head = 0; + data->produce_tail = 0; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + bool was_empty; + int retries = 0; + + vsk = vsock_sk(sk); + + smp_mb(); + + was_empty = + vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written; + if (was_empty) { + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) { + pr_err("%p unable to send wrote notification to peer\n", + sk); + return err; + } + + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +/* Socket always on control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c new file mode 100644 index 000000000000..b7df1aea7c59 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.c @@ -0,0 +1,86 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vsock_addr.h" + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port) +{ + memset(addr, 0, sizeof(*addr)); + addr->svm_family = AF_VSOCK; + addr->svm_cid = cid; + addr->svm_port = port; +} +EXPORT_SYMBOL_GPL(vsock_addr_init); + +int vsock_addr_validate(const struct sockaddr_vm *addr) +{ + if (!addr) + return -EFAULT; + + if (addr->svm_family != AF_VSOCK) + return -EAFNOSUPPORT; + + if (addr->svm_zero[0] != 0) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_addr_validate); + +bool vsock_addr_bound(const struct sockaddr_vm *addr) +{ + return addr->svm_port != VMADDR_PORT_ANY; +} +EXPORT_SYMBOL_GPL(vsock_addr_bound); + +void vsock_addr_unbind(struct sockaddr_vm *addr) +{ + vsock_addr_init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); +} +EXPORT_SYMBOL_GPL(vsock_addr_unbind); + +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return addr->svm_cid == other->svm_cid && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); + +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return (addr->svm_cid == VMADDR_CID_ANY || + other->svm_cid == VMADDR_CID_ANY || + addr->svm_cid == other->svm_cid) && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any); + +int vsock_addr_cast(const struct sockaddr *addr, + size_t len, struct sockaddr_vm **out_addr) +{ + if (len < sizeof(**out_addr)) + return -EFAULT; + + *out_addr = (struct sockaddr_vm *)addr; + return vsock_addr_validate(*out_addr); +} +EXPORT_SYMBOL_GPL(vsock_addr_cast); diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h new file mode 100644 index 000000000000..cdfbcefdf843 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.h @@ -0,0 +1,32 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_ADDR_H_ +#define _VSOCK_ADDR_H_ + +#include + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port); +int vsock_addr_validate(const struct sockaddr_vm *addr); +bool vsock_addr_bound(const struct sockaddr_vm *addr); +void vsock_addr_unbind(struct sockaddr_vm *addr); +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +int vsock_addr_cast(const struct sockaddr *addr, size_t len, + struct sockaddr_vm **out_addr); + +#endif diff --git a/net/vmw_vsock/vsock_version.h b/net/vmw_vsock/vsock_version.h new file mode 100644 index 000000000000..4df7f5e2151c --- /dev/null +++ b/net/vmw_vsock/vsock_version.h @@ -0,0 +1,22 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2011-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_VERSION_H_ +#define _VSOCK_VERSION_H_ + +#define VSOCK_DRIVER_VERSION_PARTS { 1, 0, 0, 0 } +#define VSOCK_DRIVER_VERSION_STRING "1.0.0.0-k" + +#endif /* _VSOCK_VERSION_H_ */ -- cgit v1.2.3 From 86fbe9bb599fcaf7e92e38dbfdad0414a2d68f7d Mon Sep 17 00:00:00 2001 From: David Ward Date: Fri, 8 Feb 2013 17:17:07 +0000 Subject: net/8021q: Implement Multiple VLAN Registration Protocol (MVRP) Initial implementation of the Multiple VLAN Registration Protocol (MVRP) from IEEE 802.1Q-2011, based on the existing implementation of the GARP VLAN Registration Protocol (GVRP). Signed-off-by: David Ward Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + include/uapi/linux/if_vlan.h | 1 + net/8021q/Kconfig | 11 +++++++ net/8021q/Makefile | 1 + net/8021q/vlan.c | 27 +++++++++++++--- net/8021q/vlan.h | 16 ++++++++++ net/8021q/vlan_dev.c | 12 +++++++- net/8021q/vlan_mvrp.c | 72 +++++++++++++++++++++++++++++++++++++++++++ net/8021q/vlan_netlink.c | 2 +- 9 files changed, 136 insertions(+), 7 deletions(-) create mode 100644 net/8021q/vlan_mvrp.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 67fb87ca1094..798032d01112 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -83,6 +83,7 @@ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ #define ETH_P_TIPC 0x88CA /* TIPC */ #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ +#define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */ #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ #define ETH_P_TDLS 0x890D /* TDLS */ diff --git a/include/uapi/linux/if_vlan.h b/include/uapi/linux/if_vlan.h index 0744f8e65d15..7e5e6b397332 100644 --- a/include/uapi/linux/if_vlan.h +++ b/include/uapi/linux/if_vlan.h @@ -34,6 +34,7 @@ enum vlan_flags { VLAN_FLAG_REORDER_HDR = 0x1, VLAN_FLAG_GVRP = 0x2, VLAN_FLAG_LOOSE_BINDING = 0x4, + VLAN_FLAG_MVRP = 0x8, }; enum vlan_name_types { diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index fa073a54963e..8f7517df41a5 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -27,3 +27,14 @@ config VLAN_8021Q_GVRP automatic propagation of registered VLANs to switches. If unsure, say N. + +config VLAN_8021Q_MVRP + bool "MVRP (Multiple VLAN Registration Protocol) support" + depends on VLAN_8021Q + select MRP + help + Select this to enable MVRP end-system support. MVRP is used for + automatic propagation of registered VLANs to switches; it + supersedes GVRP and is not backwards-compatible. + + If unsure, say N. diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 9f4f174ead1c..7bc8db08d7ef 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -6,5 +6,6 @@ obj-$(CONFIG_VLAN_8021Q) += 8021q.o 8021q-y := vlan.o vlan_dev.o vlan_netlink.o 8021q-$(CONFIG_VLAN_8021Q_GVRP) += vlan_gvrp.o +8021q-$(CONFIG_VLAN_8021Q_MVRP) += vlan_mvrp.o 8021q-$(CONFIG_PROC_FS) += vlanproc.o diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index addc578d5443..a18714469bf7 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -95,6 +95,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp->nr_vlan_devs--; + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_leave(dev); if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_leave(dev); @@ -107,8 +109,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) netdev_upper_dev_unlink(real_dev, dev); - if (grp->nr_vlan_devs == 0) + if (grp->nr_vlan_devs == 0) { + vlan_mvrp_uninit_applicant(real_dev); vlan_gvrp_uninit_applicant(real_dev); + } /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); @@ -151,15 +155,18 @@ int register_vlan_dev(struct net_device *dev) err = vlan_gvrp_init_applicant(real_dev); if (err < 0) goto out_vid_del; + err = vlan_mvrp_init_applicant(real_dev); + if (err < 0) + goto out_uninit_gvrp; } err = vlan_group_prealloc_vid(grp, vlan_id); if (err < 0) - goto out_uninit_applicant; + goto out_uninit_mvrp; err = netdev_upper_dev_link(real_dev, dev); if (err) - goto out_uninit_applicant; + goto out_uninit_mvrp; err = register_netdevice(dev); if (err < 0) @@ -181,7 +188,10 @@ int register_vlan_dev(struct net_device *dev) out_upper_dev_unlink: netdev_upper_dev_unlink(real_dev, dev); -out_uninit_applicant: +out_uninit_mvrp: + if (grp->nr_vlan_devs == 0) + vlan_mvrp_uninit_applicant(real_dev); +out_uninit_gvrp: if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); out_vid_del: @@ -655,13 +665,19 @@ static int __init vlan_proto_init(void) if (err < 0) goto err3; - err = vlan_netlink_init(); + err = vlan_mvrp_init(); if (err < 0) goto err4; + err = vlan_netlink_init(); + if (err < 0) + goto err5; + vlan_ioctl_set(vlan_ioctl_handler); return 0; +err5: + vlan_mvrp_uninit(); err4: vlan_gvrp_uninit(); err3: @@ -682,6 +698,7 @@ static void __exit vlan_cleanup_module(void) unregister_pernet_subsys(&vlan_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ + vlan_mvrp_uninit(); vlan_gvrp_uninit(); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index a4886d94c40c..670f1e8cfc0f 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -171,6 +171,22 @@ static inline int vlan_gvrp_init(void) { return 0; } static inline void vlan_gvrp_uninit(void) {} #endif +#ifdef CONFIG_VLAN_8021Q_MVRP +extern int vlan_mvrp_request_join(const struct net_device *dev); +extern void vlan_mvrp_request_leave(const struct net_device *dev); +extern int vlan_mvrp_init_applicant(struct net_device *dev); +extern void vlan_mvrp_uninit_applicant(struct net_device *dev); +extern int vlan_mvrp_init(void); +extern void vlan_mvrp_uninit(void); +#else +static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; } +static inline void vlan_mvrp_request_leave(const struct net_device *dev) {} +static inline int vlan_mvrp_init_applicant(struct net_device *dev) { return 0; } +static inline void vlan_mvrp_uninit_applicant(struct net_device *dev) {} +static inline int vlan_mvrp_init(void) { return 0; } +static inline void vlan_mvrp_uninit(void) {} +#endif + extern const char vlan_fullname[]; extern const char vlan_version[]; extern int vlan_netlink_init(void); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 09f9108d4688..34df5b3c9b75 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -261,7 +261,7 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) u32 old_flags = vlan->flags; if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; vlan->flags = (old_flags & ~mask) | (flags & mask); @@ -272,6 +272,13 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) else vlan_gvrp_request_leave(dev); } + + if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_MVRP) { + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + else + vlan_mvrp_request_leave(dev); + } return 0; } @@ -312,6 +319,9 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + if (netif_carrier_ok(real_dev)) netif_carrier_on(dev); return 0; diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c new file mode 100644 index 000000000000..d9ec1d5964aa --- /dev/null +++ b/net/8021q/vlan_mvrp.c @@ -0,0 +1,72 @@ +/* + * IEEE 802.1Q Multiple VLAN Registration Protocol (MVRP) + * + * Copyright (c) 2012 Massachusetts Institute of Technology + * + * Adapted from code in net/8021q/vlan_gvrp.c + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include "vlan.h" + +#define MRP_MVRP_ADDRESS { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 } + +enum mvrp_attributes { + MVRP_ATTR_INVALID, + MVRP_ATTR_VID, + __MVRP_ATTR_MAX +}; +#define MVRP_ATTR_MAX (__MVRP_ATTR_MAX - 1) + +static struct mrp_application vlan_mrp_app __read_mostly = { + .type = MRP_APPLICATION_MVRP, + .maxattr = MVRP_ATTR_MAX, + .pkttype.type = htons(ETH_P_MVRP), + .group_address = MRP_MVRP_ADDRESS, + .version = 0, +}; + +int vlan_mvrp_request_join(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + return mrp_request_join(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +void vlan_mvrp_request_leave(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + mrp_request_leave(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +int vlan_mvrp_init_applicant(struct net_device *dev) +{ + return mrp_init_applicant(dev, &vlan_mrp_app); +} + +void vlan_mvrp_uninit_applicant(struct net_device *dev) +{ + mrp_uninit_applicant(dev, &vlan_mrp_app); +} + +int __init vlan_mvrp_init(void) +{ + return mrp_register_application(&vlan_mrp_app); +} + +void vlan_mvrp_uninit(void) +{ + mrp_unregister_application(&vlan_mrp_app); +} diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 708c80ea1874..1789658b7cd7 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -62,7 +62,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) flags = nla_data(data[IFLA_VLAN_FLAGS]); if ((flags->flags & flags->mask) & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; } -- cgit v1.2.3 From 3401d54696f992edf036f00f46c8c399d1b75c2a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 23 Jan 2013 13:18:04 -0500 Subject: KVM: ARM: Introduce KVM_ARM_SET_DEVICE_ADDR ioctl On ARM some bits are specific to the model being emulated for the guest and user space needs a way to tell the kernel about those bits. An example is mmio device base addresses, where KVM must know the base address for a given device to properly emulate mmio accesses within a certain address range or directly map a device with virtualiation extensions into the guest address space. We make this API ARM-specific as we haven't yet reached a consensus for a generic API for all KVM architectures that will allow us to do something like this. Reviewed-by: Will Deacon Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 37 +++++++++++++++++++++++++++++++++++++ arch/arm/include/uapi/asm/kvm.h | 13 +++++++++++++ arch/arm/kvm/arm.c | 23 ++++++++++++++++++++++- include/uapi/linux/kvm.h | 8 ++++++++ 4 files changed, 80 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c25439a58274..4505f869e450 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2210,6 +2210,43 @@ This ioctl returns the guest registers that are supported for the KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. +4.80 KVM_ARM_SET_DEVICE_ADDR + +Capability: KVM_CAP_ARM_SET_DEVICE_ADDR +Architectures: arm +Type: vm ioctl +Parameters: struct kvm_arm_device_address (in) +Returns: 0 on success, -1 on error +Errors: + ENODEV: The device id is unknown + ENXIO: Device not supported on current system + EEXIST: Address already set + E2BIG: Address outside guest physical address space + +struct kvm_arm_device_addr { + __u64 id; + __u64 addr; +}; + +Specify a device address in the guest's physical address space where guests +can access emulated or directly exposed devices, which the host kernel needs +to know about. The id field is an architecture specific identifier for a +specific device. + +ARM divides the id field into two parts, a device id and an address type id +specific to the individual device. + +  bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 | + field: | 0x00000000 | device id | addr type id | + +ARM currently only require this when using the in-kernel GIC support for the +hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id. When +setting the base address for the guest's mapping of the VGIC virtual CPU +and distributor interface, the ioctl must be called after calling +KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling +this ioctl twice for any of the base addresses will return -EEXIST. + + 5. The kvm_run structure ------------------------ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 3303ff5adbf3..346ac3f4a2b8 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -65,6 +65,19 @@ struct kvm_regs { #define KVM_ARM_TARGET_CORTEX_A15 0 #define KVM_ARM_NUM_TARGETS 1 +/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ +#define KVM_ARM_DEVICE_TYPE_SHIFT 0 +#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_ID_SHIFT 16 +#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) + +/* Supported device IDs */ +#define KVM_ARM_DEVICE_VGIC_V2 0 + +/* Supported VGIC address types */ +#define KVM_VGIC_V2_ADDR_TYPE_DIST 0 +#define KVM_VGIC_V2_ADDR_TYPE_CPU 1 + #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ struct kvm_vcpu_init { diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2d30e3afdaf9..523f77a44e44 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -167,6 +167,8 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; + case KVM_CAP_ARM_SET_DEVICE_ADDR: + r = 1; case KVM_CAP_NR_VCPUS: r = num_online_cpus(); break; @@ -827,10 +829,29 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -EINVAL; } +static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, + struct kvm_arm_device_addr *dev_addr) +{ + return -ENODEV; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - return -EINVAL; + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_ARM_SET_DEVICE_ADDR: { + struct kvm_arm_device_addr dev_addr; + + if (copy_from_user(&dev_addr, argp, sizeof(dev_addr))) + return -EFAULT; + return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); + } + default: + return -EINVAL; + } } static void cpu_init_hyp_mode(void *vector) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7f2360a46fc2..c70577cf67bc 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -637,6 +637,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 #define KVM_CAP_ARM_PSCI 87 +#define KVM_CAP_ARM_SET_DEVICE_ADDR 88 #ifdef KVM_CAP_IRQ_ROUTING @@ -784,6 +785,11 @@ struct kvm_msi { __u8 pad[16]; }; +struct kvm_arm_device_addr { + __u64 id; + __u64 addr; +}; + /* * ioctls for VM fds */ @@ -869,6 +875,8 @@ struct kvm_s390_ucas_mapping { #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) /* Available with KVM_CAP_PPC_HTAB_FD */ #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) +/* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ +#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) /* * ioctls for vcpu fds -- cgit v1.2.3 From b6a7bceb3b9315478657bc55884dfdcd104c9864 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Mon, 11 Feb 2013 23:56:40 -0800 Subject: nl80211: minor correction in sample wowlan mask calculation The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where xx indicates "don't care") should be represented by a pattern of twelve zero bytes, and a mask of "0xed,0x01", not "0xed,0x07". mask_len = (pat_len + 7) / 8 = (12 + 7) / 8 = 2 Hence the mask will be of 2 bytes. Replace each valid byte in pattern by 1 and don't care byte by 0: 10110111 1000 (0000) 1st byte of pattern corresponds to lower order bit in first byte of mask. And 9th byte of pattern corresponds to lower order bit in second byte of mask. With this logic the mask will be 11101101 00000001 = 0xed 0x01 Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9a2ecdc4136c..b23321154e8b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2906,7 +2906,7 @@ enum nl80211_tx_power_setting { * corresponds to the lowest-order bit in the second byte of the mask. * For example: The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where * xx indicates "don't care") would be represented by a pattern of - * twelve zero bytes, and a mask of "0xed,0x07". + * twelve zero bytes, and a mask of "0xed,0x01". * Note that the pattern matching is done as though frames were not * 802.11 frames but 802.3 frames, i.e. the frame is fully unpacked * first (including SNAP header unpacking) and then matched. -- cgit v1.2.3 From bb92d19983a4b54be3e3b83441a8076d92cd04bc Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 12 Feb 2013 12:16:26 -0800 Subject: nl80211: add packet offset information for wowlan pattern If user knows the location of a wowlan pattern to be matched in Rx packet, he can provide an offset with the pattern. This will help drivers to ignore initial bytes and match the pattern efficiently. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao [refactor pattern sending] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 +++ include/uapi/linux/nl80211.h | 12 +++++-- net/wireless/nl80211.c | 74 ++++++++++++++++++++++++++++---------------- 3 files changed, 60 insertions(+), 30 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 77686ca28948..d3a73818e44c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1576,6 +1576,7 @@ struct cfg80211_pmksa { * one bit per byte, in same format as nl80211 * @pattern: bytes to match where bitmask is 1 * @pattern_len: length of pattern (in bytes) + * @pkt_offset: packet offset (in bytes) * * Internal note: @mask and @pattern are allocated in one chunk of * memory, free @mask only! @@ -1583,6 +1584,7 @@ struct cfg80211_pmksa { struct cfg80211_wowlan_trig_pkt_pattern { u8 *mask, *pattern; int pattern_len; + int pkt_offset; }; /** @@ -2290,12 +2292,14 @@ enum wiphy_wowlan_support_flags { * (see nl80211.h for the pattern definition) * @pattern_max_len: maximum length of each pattern * @pattern_min_len: minimum length of each pattern + * @max_pkt_offset: maximum Rx packet offset */ struct wiphy_wowlan_support { u32 flags; int n_patterns; int pattern_max_len; int pattern_min_len; + int max_pkt_offset; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b23321154e8b..eb7b32247ec5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2910,6 +2910,8 @@ enum nl80211_tx_power_setting { * Note that the pattern matching is done as though frames were not * 802.11 frames but 802.3 frames, i.e. the frame is fully unpacked * first (including SNAP header unpacking) and then matched. + * @NL80211_WOWLAN_PKTPAT_OFFSET: packet offset, pattern is matched after + * these fixed number of bytes of received packet * @NUM_NL80211_WOWLAN_PKTPAT: number of attributes * @MAX_NL80211_WOWLAN_PKTPAT: max attribute number */ @@ -2917,6 +2919,7 @@ enum nl80211_wowlan_packet_pattern_attr { __NL80211_WOWLAN_PKTPAT_INVALID, NL80211_WOWLAN_PKTPAT_MASK, NL80211_WOWLAN_PKTPAT_PATTERN, + NL80211_WOWLAN_PKTPAT_OFFSET, NUM_NL80211_WOWLAN_PKTPAT, MAX_NL80211_WOWLAN_PKTPAT = NUM_NL80211_WOWLAN_PKTPAT - 1, @@ -2927,6 +2930,7 @@ enum nl80211_wowlan_packet_pattern_attr { * @max_patterns: maximum number of patterns supported * @min_pattern_len: minimum length of each pattern * @max_pattern_len: maximum length of each pattern + * @max_pkt_offset: maximum Rx packet offset * * This struct is carried in %NL80211_WOWLAN_TRIG_PKT_PATTERN when * that is part of %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED in the @@ -2936,6 +2940,7 @@ struct nl80211_wowlan_pattern_support { __u32 max_patterns; __u32 min_pattern_len; __u32 max_pattern_len; + __u32 max_pkt_offset; } __attribute__((packed)); /** @@ -2951,9 +2956,10 @@ struct nl80211_wowlan_pattern_support { * @NL80211_WOWLAN_TRIG_PKT_PATTERN: wake up on the specified packet patterns * which are passed in an array of nested attributes, each nested attribute * defining a with attributes from &struct nl80211_wowlan_trig_pkt_pattern. - * Each pattern defines a wakeup packet. The matching is done on the MSDU, - * i.e. as though the packet was an 802.3 packet, so the pattern matching - * is done after the packet is converted to the MSDU. + * Each pattern defines a wakeup packet. Packet offset is associated with + * each pattern which is used while matching the pattern. The matching is + * done on the MSDU, i.e. as though the packet was an 802.3 packet, so the + * pattern matching is done after the packet is converted to the MSDU. * * In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute * carrying a &struct nl80211_wowlan_pattern_support. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 93bc63eae076..cc0fad30b8c9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1238,6 +1238,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.wowlan.pattern_min_len, .max_pattern_len = dev->wiphy.wowlan.pattern_max_len, + .max_pkt_offset = + dev->wiphy.wowlan.max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, sizeof(pat), &pat)) @@ -6895,6 +6897,39 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) } #ifdef CONFIG_PM +static int nl80211_send_wowlan_patterns(struct sk_buff *msg, + struct cfg80211_registered_device *rdev) +{ + struct nlattr *nl_pats, *nl_pat; + int i, pat_len; + + if (!rdev->wowlan->n_patterns) + return 0; + + nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); + if (!nl_pats) + return -ENOBUFS; + + for (i = 0; i < rdev->wowlan->n_patterns; i++) { + nl_pat = nla_nest_start(msg, i + 1); + if (!nl_pat) + return -ENOBUFS; + pat_len = rdev->wowlan->patterns[i].pattern_len; + if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, + DIV_ROUND_UP(pat_len, 8), + rdev->wowlan->patterns[i].mask) || + nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, + pat_len, rdev->wowlan->patterns[i].pattern) || + nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET, + rdev->wowlan->patterns[i].pkt_offset)) + return -ENOBUFS; + nla_nest_end(msg, nl_pat); + } + nla_nest_end(msg, nl_pats); + + return 0; +} + static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6935,32 +6970,8 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) (rdev->wowlan->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; - if (rdev->wowlan->n_patterns) { - struct nlattr *nl_pats, *nl_pat; - int i, pat_len; - - nl_pats = nla_nest_start(msg, - NL80211_WOWLAN_TRIG_PKT_PATTERN); - if (!nl_pats) - goto nla_put_failure; - - for (i = 0; i < rdev->wowlan->n_patterns; i++) { - nl_pat = nla_nest_start(msg, i + 1); - if (!nl_pat) - goto nla_put_failure; - pat_len = rdev->wowlan->patterns[i].pattern_len; - if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, - DIV_ROUND_UP(pat_len, 8), - rdev->wowlan->patterns[i].mask) || - nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, - pat_len, - rdev->wowlan->patterns[i].pattern)) - goto nla_put_failure; - nla_nest_end(msg, nl_pat); - } - nla_nest_end(msg, nl_pats); - } - + if (nl80211_send_wowlan_patterns(msg, rdev)) + goto nla_put_failure; nla_nest_end(msg, nl_wowlan); } @@ -7046,7 +7057,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) { struct nlattr *pat; int n_patterns = 0; - int rem, pat_len, mask_len; + int rem, pat_len, mask_len, pkt_offset; struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT]; nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], @@ -7081,6 +7092,15 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) pat_len < wowlan->pattern_min_len) goto error; + if (!pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]) + pkt_offset = 0; + else + pkt_offset = nla_get_u32( + pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]); + if (pkt_offset > wowlan->max_pkt_offset) + goto error; + new_triggers.patterns[i].pkt_offset = pkt_offset; + new_triggers.patterns[i].mask = kmalloc(mask_len + pat_len, GFP_KERNEL); if (!new_triggers.patterns[i].mask) { -- cgit v1.2.3 From 2a0e047ed62f20664005881b8e7f9328f910316a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Jan 2013 22:57:40 +0100 Subject: cfg80211: configuration for WoWLAN over TCP Intel Wireless devices are able to make a TCP connection after suspending, sending some data and waking up when the connection receives wakeup data (or breaks). Add the WoWLAN configuration and feature advertising API for it. Acked-by: David S. Miller Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 55 +++++++- include/uapi/linux/nl80211.h | 125 ++++++++++++++++++ net/wireless/core.h | 3 + net/wireless/nl80211.c | 295 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 474 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d3a73818e44c..7e6569e1f16f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -19,6 +19,7 @@ #include #include #include +#include #include /** @@ -1587,6 +1588,41 @@ struct cfg80211_wowlan_trig_pkt_pattern { int pkt_offset; }; +/** + * struct cfg80211_wowlan_tcp - TCP connection parameters + * + * @sock: (internal) socket for source port allocation + * @src: source IP address + * @dst: destination IP address + * @dst_mac: destination MAC address + * @src_port: source port + * @dst_port: destination port + * @payload_len: data payload length + * @payload: data payload buffer + * @payload_seq: payload sequence stamping configuration + * @data_interval: interval at which to send data packets + * @wake_len: wakeup payload match length + * @wake_data: wakeup payload match data + * @wake_mask: wakeup payload match mask + * @tokens_size: length of the tokens buffer + * @payload_tok: payload token usage configuration + */ +struct cfg80211_wowlan_tcp { + struct socket *sock; + __be32 src, dst; + u16 src_port, dst_port; + u8 dst_mac[ETH_ALEN]; + int payload_len; + const u8 *payload; + struct nl80211_wowlan_tcp_data_seq payload_seq; + u32 data_interval; + u32 wake_len; + const u8 *wake_data, *wake_mask; + u32 tokens_size; + /* must be last, variable member */ + struct nl80211_wowlan_tcp_data_token payload_tok; +}; + /** * struct cfg80211_wowlan - Wake on Wireless-LAN support info * @@ -1601,12 +1637,15 @@ struct cfg80211_wowlan_trig_pkt_pattern { * @eap_identity_req: wake up on EAP identity request packet * @four_way_handshake: wake up on 4-way handshake * @rfkill_release: wake up when rfkill is released + * @tcp: TCP connection establishment/wakeup parameters, see nl80211.h. + * NULL if not configured. */ struct cfg80211_wowlan { bool any, disconnect, magic_pkt, gtk_rekey_failure, eap_identity_req, four_way_handshake, rfkill_release; struct cfg80211_wowlan_trig_pkt_pattern *patterns; + struct cfg80211_wowlan_tcp *tcp; int n_patterns; }; @@ -1626,11 +1665,15 @@ struct cfg80211_wowlan { * frame triggers an 802.3 frame should be reported, for * disconnect due to deauth 802.11 frame. This indicates which * it is. + * @tcp_match: TCP wakeup packet received + * @tcp_connlost: TCP connection lost or failed to establish + * @tcp_nomoretokens: TCP data ran out of tokens */ struct cfg80211_wowlan_wakeup { bool disconnect, magic_pkt, gtk_rekey_failure, eap_identity_req, four_way_handshake, - rfkill_release, packet_80211; + rfkill_release, packet_80211, + tcp_match, tcp_connlost, tcp_nomoretokens; s32 pattern_idx; u32 packet_present_len, packet_len; const void *packet; @@ -2285,6 +2328,14 @@ enum wiphy_wowlan_support_flags { WIPHY_WOWLAN_RFKILL_RELEASE = BIT(7), }; +struct wiphy_wowlan_tcp_support { + const struct nl80211_wowlan_tcp_data_token_feature *tok; + u32 data_payload_max; + u32 data_interval_max; + u32 wake_payload_max; + bool seq; +}; + /** * struct wiphy_wowlan_support - WoWLAN support data * @flags: see &enum wiphy_wowlan_support_flags @@ -2293,6 +2344,7 @@ enum wiphy_wowlan_support_flags { * @pattern_max_len: maximum length of each pattern * @pattern_min_len: minimum length of each pattern * @max_pkt_offset: maximum Rx packet offset + * @tcp: TCP wakeup support information */ struct wiphy_wowlan_support { u32 flags; @@ -2300,6 +2352,7 @@ struct wiphy_wowlan_support { int pattern_max_len; int pattern_min_len; int max_pkt_offset; + const struct wiphy_wowlan_tcp_support *tcp; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index eb7b32247ec5..5309b34930ea 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2991,6 +2991,17 @@ struct nl80211_wowlan_pattern_support { * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN: Original length of the 802.3 * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023 * attribute if the packet was truncated somewhere. + * @NL80211_WOWLAN_TRIG_TCP_CONNECTION: TCP connection wake, see DOC section + * "TCP connection wakeup" for more details. This is a nested attribute + * containing the exact information for establishing and keeping alive + * the TCP connection. + * @NL80211_WOWLAN_TRIG_TCP_WAKEUP_MATCH: For wakeup reporting only, the + * wakeup packet was received on the TCP connection + * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST: For wakeup reporting only, the + * TCP connection was lost or failed to be established + * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS: For wakeup reporting only, + * the TCP connection ran out of tokens to use for data to send to the + * service * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number * @@ -3012,12 +3023,126 @@ enum nl80211_wowlan_triggers { NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN, NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023, NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN, + NL80211_WOWLAN_TRIG_TCP_CONNECTION, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS, /* keep last */ NUM_NL80211_WOWLAN_TRIG, MAX_NL80211_WOWLAN_TRIG = NUM_NL80211_WOWLAN_TRIG - 1 }; +/** + * DOC: TCP connection wakeup + * + * Some devices can establish a TCP connection in order to be woken up by a + * packet coming in from outside their network segment, or behind NAT. If + * configured, the device will establish a TCP connection to the given + * service, and periodically send data to that service. The first data + * packet is usually transmitted after SYN/ACK, also ACKing the SYN/ACK. + * The data packets can optionally include a (little endian) sequence + * number (in the TCP payload!) that is generated by the device, and, also + * optionally, a token from a list of tokens. This serves as a keep-alive + * with the service, and for NATed connections, etc. + * + * During this keep-alive period, the server doesn't send any data to the + * client. When receiving data, it is compared against the wakeup pattern + * (and mask) and if it matches, the host is woken up. Similarly, if the + * connection breaks or cannot be established to start with, the host is + * also woken up. + * + * Developer's note: ARP offload is required for this, otherwise TCP + * response packets might not go through correctly. + */ + +/** + * struct nl80211_wowlan_tcp_data_seq - WoWLAN TCP data sequence + * @start: starting value + * @offset: offset of sequence number in packet + * @len: length of the sequence value to write, 1 through 4 + * + * Note: don't confuse with the TCP sequence number(s), this is for the + * keepalive packet payload. The actual value is written into the packet + * in little endian. + */ +struct nl80211_wowlan_tcp_data_seq { + __u32 start, offset, len; +}; + +/** + * struct nl80211_wowlan_tcp_data_token - WoWLAN TCP data token config + * @offset: offset of token in packet + * @len: length of each token + * @token_stream: stream of data to be used for the tokens, the length must + * be a multiple of @len for this to make sense + */ +struct nl80211_wowlan_tcp_data_token { + __u32 offset, len; + __u8 token_stream[]; +}; + +/** + * struct nl80211_wowlan_tcp_data_token_feature - data token features + * @min_len: minimum token length + * @max_len: maximum token length + * @bufsize: total available token buffer size (max size of @token_stream) + */ +struct nl80211_wowlan_tcp_data_token_feature { + __u32 min_len, max_len, bufsize; +}; + +/** + * enum nl80211_wowlan_tcp_attrs - WoWLAN TCP connection parameters + * @__NL80211_WOWLAN_TCP_INVALID: invalid number for nested attributes + * @NL80211_WOWLAN_TCP_SRC_IPV4: source IPv4 address (in network byte order) + * @NL80211_WOWLAN_TCP_DST_IPV4: destination IPv4 address + * (in network byte order) + * @NL80211_WOWLAN_TCP_DST_MAC: destination MAC address, this is given because + * route lookup when configured might be invalid by the time we suspend, + * and doing a route lookup when suspending is no longer possible as it + * might require ARP querying. + * @NL80211_WOWLAN_TCP_SRC_PORT: source port (u16); optional, if not given a + * socket and port will be allocated + * @NL80211_WOWLAN_TCP_DST_PORT: destination port (u16) + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD: data packet payload, at least one byte. + * For feature advertising, a u32 attribute holding the maximum length + * of the data payload. + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ: data packet sequence configuration + * (if desired), a &struct nl80211_wowlan_tcp_data_seq. For feature + * advertising it is just a flag + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN: data packet token configuration, + * see &struct nl80211_wowlan_tcp_data_token and for advertising see + * &struct nl80211_wowlan_tcp_data_token_feature. + * @NL80211_WOWLAN_TCP_DATA_INTERVAL: data interval in seconds, maximum + * interval in feature advertising (u32) + * @NL80211_WOWLAN_TCP_WAKE_PAYLOAD: wake packet payload, for advertising a + * u32 attribute holding the maximum length + * @NL80211_WOWLAN_TCP_WAKE_MASK: Wake packet payload mask, not used for + * feature advertising. The mask works like @NL80211_WOWLAN_PKTPAT_MASK + * but on the TCP payload only. + * @NUM_NL80211_WOWLAN_TCP: number of TCP attributes + * @MAX_NL80211_WOWLAN_TCP: highest attribute number + */ +enum nl80211_wowlan_tcp_attrs { + __NL80211_WOWLAN_TCP_INVALID, + NL80211_WOWLAN_TCP_SRC_IPV4, + NL80211_WOWLAN_TCP_DST_IPV4, + NL80211_WOWLAN_TCP_DST_MAC, + NL80211_WOWLAN_TCP_SRC_PORT, + NL80211_WOWLAN_TCP_DST_PORT, + NL80211_WOWLAN_TCP_DATA_PAYLOAD, + NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, + NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + NL80211_WOWLAN_TCP_DATA_INTERVAL, + NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + NL80211_WOWLAN_TCP_WAKE_MASK, + + /* keep last */ + NUM_NL80211_WOWLAN_TCP, + MAX_NL80211_WOWLAN_TCP = NUM_NL80211_WOWLAN_TCP - 1 +}; + /** * enum nl80211_iface_limit_attrs - limit attributes * @NL80211_IFACE_LIMIT_UNSPEC: (reserved) diff --git a/net/wireless/core.h b/net/wireless/core.h index 37d70dc2fe82..949c9573d8d7 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -108,6 +108,9 @@ cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) for (i = 0; i < rdev->wowlan->n_patterns; i++) kfree(rdev->wowlan->patterns[i].mask); kfree(rdev->wowlan->patterns); + if (rdev->wowlan->tcp && rdev->wowlan->tcp->sock) + sock_release(rdev->wowlan->tcp->sock); + kfree(rdev->wowlan->tcp); kfree(rdev->wowlan); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index cc0fad30b8c9..d29a461b4981 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "core.h" #include "nl80211.h" #include "reg.h" @@ -399,6 +400,26 @@ nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = { [NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_RFKILL_RELEASE] = { .type = NLA_FLAG }, + [NL80211_WOWLAN_TRIG_TCP_CONNECTION] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = { + [NL80211_WOWLAN_TCP_SRC_IPV4] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_DST_IPV4] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_DST_MAC] = { .len = ETH_ALEN }, + [NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 }, + [NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD] = { .len = 1 }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ] = { + .len = sizeof(struct nl80211_wowlan_tcp_data_seq) + }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN] = { + .len = sizeof(struct nl80211_wowlan_tcp_data_token) + }, + [NL80211_WOWLAN_TCP_DATA_INTERVAL] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 }, + [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 }, }; /* policy for GTK rekey offload attributes */ @@ -872,6 +893,48 @@ nla_put_failure: return -ENOBUFS; } +#ifdef CONFIG_PM +static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) +{ + const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp; + struct nlattr *nl_tcp; + + if (!tcp) + return 0; + + nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + if (!nl_tcp) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->data_payload_max)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->data_payload_max)) + return -ENOBUFS; + + if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ)) + return -ENOBUFS; + + if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + sizeof(*tcp->tok), tcp->tok)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, + tcp->data_interval_max)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + tcp->wake_payload_max)) + return -ENOBUFS; + + nla_nest_end(msg, nl_tcp); + return 0; +} +#endif + static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *dev) { @@ -1246,6 +1309,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } + if (nl80211_send_wowlan_tcp_caps(dev, msg)) + goto nla_put_failure; + nla_nest_end(msg, nl_wowlan); } #endif @@ -6930,16 +6996,67 @@ static int nl80211_send_wowlan_patterns(struct sk_buff *msg, return 0; } +static int nl80211_send_wowlan_tcp(struct sk_buff *msg, + struct cfg80211_wowlan_tcp *tcp) +{ + struct nlattr *nl_tcp; + + if (!tcp) + return 0; + + nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + if (!nl_tcp) + return -ENOBUFS; + + if (nla_put_be32(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || + nla_put_be32(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || + nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) || + nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) || + nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) || + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->payload_len, tcp->payload) || + nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, + tcp->data_interval) || + nla_put(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + tcp->wake_len, tcp->wake_data) || + nla_put(msg, NL80211_WOWLAN_TCP_WAKE_MASK, + DIV_ROUND_UP(tcp->wake_len, 8), tcp->wake_mask)) + return -ENOBUFS; + + if (tcp->payload_seq.len && + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, + sizeof(tcp->payload_seq), &tcp->payload_seq)) + return -ENOBUFS; + + if (tcp->payload_tok.len && + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + sizeof(tcp->payload_tok) + tcp->tokens_size, + &tcp->payload_tok)) + return -ENOBUFS; + + return 0; +} + static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct sk_buff *msg; void *hdr; + u32 size = NLMSG_DEFAULT_SIZE; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns) + if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && + !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (rdev->wowlan && rdev->wowlan->tcp) { + /* adjust size to have room for all the data */ + size += rdev->wowlan->tcp->tokens_size + + rdev->wowlan->tcp->payload_len + + rdev->wowlan->tcp->wake_len + + rdev->wowlan->tcp->wake_len / 8; + } + + msg = nlmsg_new(size, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -6970,8 +7087,13 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) (rdev->wowlan->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; + if (nl80211_send_wowlan_patterns(msg, rdev)) goto nla_put_failure; + + if (nl80211_send_wowlan_tcp(msg, rdev->wowlan->tcp)) + goto nla_put_failure; + nla_nest_end(msg, nl_wowlan); } @@ -6983,6 +7105,150 @@ nla_put_failure: return -ENOBUFS; } +static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, + struct nlattr *attr, + struct cfg80211_wowlan *trig) +{ + struct nlattr *tb[NUM_NL80211_WOWLAN_TCP]; + struct cfg80211_wowlan_tcp *cfg; + struct nl80211_wowlan_tcp_data_token *tok = NULL; + struct nl80211_wowlan_tcp_data_seq *seq = NULL; + u32 size; + u32 data_size, wake_size, tokens_size = 0, wake_mask_size; + int err, port; + + if (!rdev->wiphy.wowlan.tcp) + return -EINVAL; + + err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, + nla_data(attr), nla_len(attr), + nl80211_wowlan_tcp_policy); + if (err) + return err; + + if (!tb[NL80211_WOWLAN_TCP_SRC_IPV4] || + !tb[NL80211_WOWLAN_TCP_DST_IPV4] || + !tb[NL80211_WOWLAN_TCP_DST_MAC] || + !tb[NL80211_WOWLAN_TCP_DST_PORT] || + !tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD] || + !tb[NL80211_WOWLAN_TCP_DATA_INTERVAL] || + !tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] || + !tb[NL80211_WOWLAN_TCP_WAKE_MASK]) + return -EINVAL; + + data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]); + if (data_size > rdev->wiphy.wowlan.tcp->data_payload_max) + return -EINVAL; + + if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > + rdev->wiphy.wowlan.tcp->data_interval_max) + return -EINVAL; + + wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); + if (wake_size > rdev->wiphy.wowlan.tcp->wake_payload_max) + return -EINVAL; + + wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]); + if (wake_mask_size != DIV_ROUND_UP(wake_size, 8)) + return -EINVAL; + + if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]) { + u32 tokln = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); + + tok = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); + tokens_size = tokln - sizeof(*tok); + + if (!tok->len || tokens_size % tok->len) + return -EINVAL; + if (!rdev->wiphy.wowlan.tcp->tok) + return -EINVAL; + if (tok->len > rdev->wiphy.wowlan.tcp->tok->max_len) + return -EINVAL; + if (tok->len < rdev->wiphy.wowlan.tcp->tok->min_len) + return -EINVAL; + if (tokens_size > rdev->wiphy.wowlan.tcp->tok->bufsize) + return -EINVAL; + if (tok->offset + tok->len > data_size) + return -EINVAL; + } + + if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) { + seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]); + if (!rdev->wiphy.wowlan.tcp->seq) + return -EINVAL; + if (seq->len == 0 || seq->len > 4) + return -EINVAL; + if (seq->len + seq->offset > data_size) + return -EINVAL; + } + + size = sizeof(*cfg); + size += data_size; + size += wake_size + wake_mask_size; + size += tokens_size; + + cfg = kzalloc(size, GFP_KERNEL); + if (!cfg) + return -ENOMEM; + cfg->src = nla_get_be32(tb[NL80211_WOWLAN_TCP_SRC_IPV4]); + cfg->dst = nla_get_be32(tb[NL80211_WOWLAN_TCP_DST_IPV4]); + memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]), + ETH_ALEN); + if (tb[NL80211_WOWLAN_TCP_SRC_PORT]) + port = nla_get_u16(tb[NL80211_WOWLAN_TCP_SRC_PORT]); + else + port = 0; +#ifdef CONFIG_INET + /* allocate a socket and port for it and use it */ + err = __sock_create(wiphy_net(&rdev->wiphy), PF_INET, SOCK_STREAM, + IPPROTO_TCP, &cfg->sock, 1); + if (err) { + kfree(cfg); + return err; + } + if (inet_csk_get_port(cfg->sock->sk, port)) { + sock_release(cfg->sock); + kfree(cfg); + return -EADDRINUSE; + } + cfg->src_port = inet_sk(cfg->sock->sk)->inet_num; +#else + if (!port) { + kfree(cfg); + return -EINVAL; + } + cfg->src_port = port; +#endif + + cfg->dst_port = nla_get_u16(tb[NL80211_WOWLAN_TCP_DST_PORT]); + cfg->payload_len = data_size; + cfg->payload = (u8 *)cfg + sizeof(*cfg) + tokens_size; + memcpy((void *)cfg->payload, + nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]), + data_size); + if (seq) + cfg->payload_seq = *seq; + cfg->data_interval = nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]); + cfg->wake_len = wake_size; + cfg->wake_data = (u8 *)cfg + sizeof(*cfg) + tokens_size + data_size; + memcpy((void *)cfg->wake_data, + nla_data(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]), + wake_size); + cfg->wake_mask = (u8 *)cfg + sizeof(*cfg) + tokens_size + + data_size + wake_size; + memcpy((void *)cfg->wake_mask, + nla_data(tb[NL80211_WOWLAN_TCP_WAKE_MASK]), + wake_mask_size); + if (tok) { + cfg->tokens_size = tokens_size; + memcpy(&cfg->payload_tok, tok, sizeof(*tok) + tokens_size); + } + + trig->tcp = cfg; + + return 0; +} + static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6993,7 +7259,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) int err, i; bool prev_enabled = rdev->wowlan; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns) + if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && + !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { @@ -7120,6 +7387,14 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) } } + if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) { + err = nl80211_parse_wowlan_tcp( + rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION], + &new_triggers); + if (err) + goto error; + } + ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL); if (!ntrig) { err = -ENOMEM; @@ -7137,6 +7412,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) for (i = 0; i < new_triggers.n_patterns; i++) kfree(new_triggers.patterns[i].mask); kfree(new_triggers.patterns); + if (new_triggers.tcp && new_triggers.tcp->sock) + sock_release(new_triggers.tcp->sock); + kfree(new_triggers.tcp); return err; } #endif @@ -9418,6 +9696,17 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, wakeup->pattern_idx)) goto free_msg; + if (wakeup->tcp_match) + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH); + + if (wakeup->tcp_connlost) + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST); + + if (wakeup->tcp_nomoretokens) + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS); + if (wakeup->packet) { u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN; -- cgit v1.2.3 From 93be6ce0e91b6a94783e012b1857a347a5e6e9f2 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 11 Feb 2013 05:50:18 +0000 Subject: tcp: set and get per-socket timestamp A timestamp can be set, only if a socket is in the repair mode. This patch adds a new socket option TCP_TIMESTAMP, which allows to get and set current tcp times stamp. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Eric Dumazet Cc: Pavel Emelyanov Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 9 +++++++++ 2 files changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index e962faa5ab0d..6b1ead0b0c9d 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -111,6 +111,7 @@ enum { #define TCP_QUEUE_SEQ 21 #define TCP_REPAIR_OPTIONS 22 #define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ +#define TCP_TIMESTAMP 24 struct tcp_repair_opt { __u32 opt_code; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8a90bda96038..801b07b796f0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2714,6 +2714,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else err = -EINVAL; break; + case TCP_TIMESTAMP: + if (!tp->repair) + err = -EPERM; + else + tp->tsoffset = val - tcp_time_stamp; + break; default: err = -ENOPROTOOPT; break; @@ -2962,6 +2968,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_USER_TIMEOUT: val = jiffies_to_msecs(icsk->icsk_user_timeout); break; + case TCP_TIMESTAMP: + val = tcp_time_stamp + tp->tsoffset; + break; default: return -ENOPROTOOPT; } -- cgit v1.2.3 From 407af3299ef1ac7e87ce3fb530e32a009d1a9efd Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:12 +0000 Subject: bridge: Add netlink interface to configure vlans on bridge ports Add a netlink interface to add and remove vlan configuration on bridge port. The interface uses the RTM_SETLINK message and encodes the vlan configuration inside the IFLA_AF_SPEC. It is possble to include multiple vlans to either add or remove in a single message. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 + include/uapi/linux/if_bridge.h | 9 +++ net/bridge/br_device.c | 1 + net/bridge/br_if.c | 1 + net/bridge/br_netlink.c | 139 +++++++++++++++++++++++++++++++++++------ net/bridge/br_private.h | 1 + net/core/rtnetlink.c | 72 +++++++++++++++++++++ 7 files changed, 207 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 25bd46f52877..1b90f9401000 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1020,6 +1020,8 @@ struct net_device_ops { int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev); + int (*ndo_bridge_dellink)(struct net_device *dev, + struct nlmsghdr *nlh); int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); }; diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5db297514aec..3ca9817ca7e8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -108,15 +108,24 @@ struct __fdb_entry { * [IFLA_AF_SPEC] = { * [IFLA_BRIDGE_FLAGS] * [IFLA_BRIDGE_MODE] + * [IFLA_BRIDGE_VLAN_INFO] * } */ enum { IFLA_BRIDGE_FLAGS, IFLA_BRIDGE_MODE, + IFLA_BRIDGE_VLAN_INFO, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) +#define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ + +struct bridge_vlan_info { + u16 flags; + u16 vid; +}; + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 35a2c2c84f33..091bedf266a0 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -316,6 +316,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_dump = br_fdb_dump, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, + .ndo_bridge_dellink = br_dellink, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index af9d65ab4001..335c60cebfd1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "br_private.h" diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 39ca9796f3f7..534a9f4587a9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "br_private.h" #include "br_private_stp.h" @@ -119,10 +120,14 @@ nla_put_failure: */ void br_ifinfo_notify(int event, struct net_bridge_port *port) { - struct net *net = dev_net(port->dev); + struct net *net; struct sk_buff *skb; int err = -ENOBUFS; + if (!port) + return; + + net = dev_net(port->dev); br_debug(port->br, "port %u(%s) event %d\n", (unsigned int)port->port_no, port->dev->name, event); @@ -144,6 +149,7 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } + /* * Dump information about all ports, in response to GETLINK */ @@ -162,6 +168,64 @@ out: return err; } +const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { + [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, + [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, + .len = sizeof(struct bridge_vlan_info), }, +}; + +static int br_afspec(struct net_bridge *br, + struct net_bridge_port *p, + struct nlattr *af_spec, + int cmd) +{ + struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + int err = 0; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); + if (err) + return err; + + if (tb[IFLA_BRIDGE_VLAN_INFO]) { + struct bridge_vlan_info *vinfo; + + vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + + if (vinfo->vid >= VLAN_N_VID) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid); + } else + err = br_vlan_add(br, vinfo->vid); + + if (err) + break; + + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else + br_vlan_delete(br, vinfo->vid); + break; + } + } + + return err; +} + static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, [IFLA_BRPORT_COST] = { .type = NLA_U32 }, @@ -241,6 +305,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { struct ifinfomsg *ifm; struct nlattr *protinfo; + struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; int err; @@ -248,38 +313,76 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - if (!protinfo) + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!protinfo && !afspec) return 0; p = br_port_get_rtnl(dev); - if (!p) + /* We want to accept dev as bridge itself if the AF_SPEC + * is set to see if someone is setting vlan info on the brigde + */ + if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) return -EINVAL; - if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, - protinfo, ifla_brport_policy); + if (p && protinfo) { + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, + protinfo, ifla_brport_policy); + if (err) + return err; + + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatability with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } if (err) - return err; - - spin_lock_bh(&p->br->lock); - err = br_setport(p, tb); - spin_unlock_bh(&p->br->lock); - } else { - /* Binary compatability with old RSTP */ - if (nla_len(protinfo) < sizeof(u8)) - return -EINVAL; + goto out; + } - spin_lock_bh(&p->br->lock); - err = br_set_port_state(p, nla_get_u8(protinfo)); - spin_unlock_bh(&p->br->lock); + if (afspec) { + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_SETLINK); } if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); +out: return err; } +/* Delete port information */ +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifm; + struct nlattr *afspec; + struct net_bridge_port *p; + int err; + + ifm = nlmsg_data(nlh); + + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself as well */ + if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + return -EINVAL; + + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_DELLINK); + + return err; +} static int br_validate(struct nlattr *tb[], struct nlattr *data[]) { if (tb[IFLA_ADDRESS]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f0f24610d111..a42f9d49a64e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -713,6 +713,7 @@ extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c1e4db60eeca..2c9ccbfbd93c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2464,6 +2464,77 @@ out: return err; } +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 oflags, flags = 0; + bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; + flags = nla_get_u16(attr); + break; + } + } + } + + oflags = flags; + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + + if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) { + err = -EOPNOTSUPP; + goto out; + } + + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + if (err) + goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; + } + + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_dellink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + + if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, oflags); +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2647,6 +2718,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } -- cgit v1.2.3 From 6cbdceeb1cb12c7d620161925a8c3e81daadb2e4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:13 +0000 Subject: bridge: Dump vlan information from a bridge port Using the RTM_GETLINK dump the vlan filter list of a given bridge port. The information depends on setting the filter flag similar to how nic VF info is dumped. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +- include/linux/netdevice.h | 3 +- include/uapi/linux/rtnetlink.h | 1 + net/bridge/br_netlink.c | 94 +++++++++++++++++++++++---- net/bridge/br_private.h | 3 +- net/bridge/br_vlan.c | 2 + net/core/rtnetlink.c | 16 +++-- 7 files changed, 104 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6999269b3a4a..4e2aa47193cb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7079,7 +7079,8 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, } static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, + u32 filter_mask) { struct ixgbe_adapter *adapter = netdev_priv(dev); u16 mode; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1b90f9401000..1964ca66df56 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1019,7 +1019,8 @@ struct net_device_ops { struct nlmsghdr *nlh); int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, + u32 filter_mask); int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh); int (*ndo_change_carrier)(struct net_device *dev, diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 7a5eb196ade9..7a2144e1afae 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -630,6 +630,7 @@ struct tcamsg { /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) /* End of information exported to user level */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 534a9f4587a9..fe1980d5a7e4 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -65,15 +65,21 @@ static int br_port_fill_attrs(struct sk_buff *skb, * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. */ -static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port, - u32 pid, u32 seq, int event, unsigned int flags) +static int br_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + u32 pid, u32 seq, int event, unsigned int flags, + u32 filter_mask, const struct net_device *dev) { - const struct net_bridge *br = port->br; - const struct net_device *dev = port->dev; + const struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + if (port) + br = port->br; + else + br = netdev_priv(dev); + br_debug(br, "br_fill_info event %d port %s master %s\n", event, dev->name, br->dev->name); @@ -99,7 +105,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_put_u32(skb, IFLA_LINK, dev->iflink))) goto nla_put_failure; - if (event == RTM_NEWLINK) { + if (event == RTM_NEWLINK && port) { struct nlattr *nest = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); @@ -108,6 +114,40 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_nest_end(skb, nest); } + /* Check if the VID information is requested */ + if (filter_mask & RTEXT_FILTER_BRVLAN) { + struct nlattr *af; + const struct net_port_vlans *pv; + struct bridge_vlan_info vinfo; + u16 vid; + + if (port) + pv = nbp_get_vlan_info(port); + else + pv = br_get_vlan_info(br); + + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) + goto done; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + + for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + vid < BR_VLAN_BITMAP_LEN; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1)) { + vinfo.vid = vid; + vinfo.flags = 0; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + nla_nest_end(skb, af); + } + +done: return nlmsg_end(skb, nlh); nla_put_failure: @@ -135,7 +175,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -154,16 +194,17 @@ errout: * Dump information about all ports, in response to GETLINK */ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, u32 filter_mask) { int err = 0; struct net_bridge_port *port = br_port_get_rcu(dev); - /* not a bridge port */ - if (!port) + /* not a bridge port and */ + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) goto out; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI); + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); out: return err; } @@ -395,6 +436,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static size_t br_get_link_af_size(const struct net_device *dev) +{ + struct net_port_vlans *pv; + + if (br_port_exists(dev)) + pv = nbp_get_vlan_info(br_port_get_rcu(dev)); + else if (dev->priv_flags & IFF_EBRIDGE) + pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); + else + return 0; + + if (!pv) + return 0; + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); +} + +struct rtnl_af_ops br_af_ops = { + .family = AF_BRIDGE, + .get_link_af_size = br_get_link_af_size, +}; + struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), @@ -408,11 +472,18 @@ int __init br_netlink_init(void) int err; br_mdb_init(); - err = rtnl_link_register(&br_link_ops); + err = rtnl_af_register(&br_af_ops); if (err) goto out; + err = rtnl_link_register(&br_link_ops); + if (err) + goto out_af; + return 0; + +out_af: + rtnl_af_unregister(&br_af_ops); out: br_mdb_uninit(); return err; @@ -421,5 +492,6 @@ out: void __exit br_netlink_fini(void) { br_mdb_uninit(); + rtnl_af_unregister(&br_af_ops); rtnl_link_unregister(&br_link_ops); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a42f9d49a64e..ce2235255c2f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -73,6 +73,7 @@ struct net_port_vlans { } parent; struct rcu_head rcu; unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + u16 num_vlans; }; struct net_bridge_fdb_entry @@ -715,7 +716,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, u32 filter_mask); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index d8690bfe63d4..f2bf5a197ea3 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -28,6 +28,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid) } set_bit(vid, v->vlan_bitmap); + v->num_vlans++; return 0; } @@ -44,6 +45,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) } clear_bit(vid, v->vlan_bitmap); + v->num_vlans--; if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2c9ccbfbd93c..f3a112ec86d5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2315,6 +2315,13 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0; u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; + struct nlattr *extfilt; + u32 filter_mask = 0; + + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + IFLA_EXT_MASK); + if (extfilt) + filter_mask = nla_get_u32(extfilt); rcu_read_lock(); for_each_netdev_rcu(net, dev) { @@ -2324,14 +2331,15 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev) < 0) + skb, portid, seq, dev, filter_mask) < 0) break; idx++; } if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && - ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) + ops->ndo_bridge_getlink(skb, portid, seq, dev, + filter_mask) < 0) break; idx++; } @@ -2372,14 +2380,14 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { - err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } if ((flags & BRIDGE_FLAGS_SELF) && dev->netdev_ops->ndo_bridge_getlink) { - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } -- cgit v1.2.3 From 552406c488ec2cf1aaf8b5bd24d1750c9fd6d8cc Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:15 +0000 Subject: bridge: Add the ability to configure pvid A user may designate a certain vlan as PVID. This means that any ingress frame that does not contain a vlan tag is assigned to this vlan and any forwarding decisions are made with this vlan in mind. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + net/bridge/br_netlink.c | 11 +++++++--- net/bridge/br_private.h | 8 +++---- net/bridge/br_vlan.c | 47 +++++++++++++++++++++++++++++++++--------- 4 files changed, 50 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 3ca9817ca7e8..c6c30e28f396 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -120,6 +120,7 @@ enum { #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) #define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ +#define BRIDGE_VLAN_INFO_PVID (1<<1) /* VLAN is PVID, ingress untagged */ struct bridge_vlan_info { u16 flags; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fe1980d5a7e4..e044cc0b5650 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -120,6 +120,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_port_vlans *pv; struct bridge_vlan_info vinfo; u16 vid; + u16 pvid; if (port) pv = nbp_get_vlan_info(port); @@ -133,12 +134,15 @@ static int br_fill_ifinfo(struct sk_buff *skb, if (!af) goto nla_put_failure; + pvid = br_get_pvid(pv); for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); vid < BR_VLAN_BITMAP_LEN; vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) { vinfo.vid = vid; vinfo.flags = 0; + if (vid == pvid) + vinfo.flags |= BRIDGE_VLAN_INFO_PVID; if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, sizeof(vinfo), &vinfo)) goto nla_put_failure; @@ -239,14 +243,15 @@ static int br_afspec(struct net_bridge *br, switch (cmd) { case RTM_SETLINK: if (p) { - err = nbp_vlan_add(p, vinfo->vid); + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); if (err) break; if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - err = br_vlan_add(p->br, vinfo->vid); + err = br_vlan_add(p->br, vinfo->vid, + vinfo->flags); } else - err = br_vlan_add(br, vinfo->vid); + err = br_vlan_add(br, vinfo->vid, vinfo->flags); if (err) break; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index ea8e7efd9137..1ae6395a0369 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -562,11 +562,11 @@ extern bool br_allowed_egress(struct net_bridge *br, extern struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *v, struct sk_buff *skb); -extern int br_vlan_add(struct net_bridge *br, u16 vid); +extern int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); extern int br_vlan_delete(struct net_bridge *br, u16 vid); extern void br_vlan_flush(struct net_bridge *br); extern int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); -extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid); +extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); extern int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); extern void nbp_vlan_flush(struct net_bridge_port *port); @@ -633,7 +633,7 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, return skb; } -static inline int br_vlan_add(struct net_bridge *br, u16 vid) +static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) { return -EOPNOTSUPP; } @@ -647,7 +647,7 @@ static inline void br_vlan_flush(struct net_bridge *br) { } -static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid) +static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { return -EOPNOTSUPP; } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 20057de56db0..c79940cff3a1 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -5,12 +5,33 @@ #include "br_private.h" -static int __vlan_add(struct net_port_vlans *v, u16 vid) +static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid == vid) + return; + + smp_wmb(); + v->pvid = vid; +} + +static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid != vid) + return; + + smp_wmb(); + v->pvid = 0; +} + +static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) { int err; - if (test_bit(vid, v->vlan_bitmap)) - return -EEXIST; + if (test_bit(vid, v->vlan_bitmap)) { + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + return 0; + } if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; @@ -29,6 +50,9 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid) set_bit(vid, v->vlan_bitmap); v->num_vlans++; + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + return 0; } @@ -37,6 +61,8 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) if (!test_bit(vid, v->vlan_bitmap)) return -EINVAL; + __vlan_delete_pvid(v, vid); + if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; @@ -58,6 +84,8 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) static void __vlan_flush(struct net_port_vlans *v) { + smp_wmb(); + v->pvid = 0; bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN); if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); @@ -185,7 +213,7 @@ bool br_allowed_egress(struct net_bridge *br, } /* Must be protected by RTNL */ -int br_vlan_add(struct net_bridge *br, u16 vid) +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; int err; @@ -194,7 +222,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid) pv = rtnl_dereference(br->vlan_info); if (pv) - return __vlan_add(pv, vid); + return __vlan_add(pv, vid, flags); /* Create port vlan infomration */ @@ -203,7 +231,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid) return -ENOMEM; pv->parent.br = br; - err = __vlan_add(pv, vid); + err = __vlan_add(pv, vid, flags); if (err) goto out; @@ -234,7 +262,6 @@ void br_vlan_flush(struct net_bridge *br) struct net_port_vlans *pv; ASSERT_RTNL(); - pv = rtnl_dereference(br->vlan_info); if (!pv) return; @@ -258,7 +285,7 @@ unlock: } /* Must be protected by RTNL */ -int nbp_vlan_add(struct net_bridge_port *port, u16 vid) +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; int err; @@ -267,7 +294,7 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid) pv = rtnl_dereference(port->vlan_info); if (pv) - return __vlan_add(pv, vid); + return __vlan_add(pv, vid, flags); /* Create port vlan infomration */ @@ -279,7 +306,7 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid) pv->port_idx = port->port_no; pv->parent.port = port; - err = __vlan_add(pv, vid); + err = __vlan_add(pv, vid, flags); if (err) goto clean_up; -- cgit v1.2.3 From 1690be63a27b20ae65c792729a44f5970561ffa4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:18 +0000 Subject: bridge: Add vlan support to static neighbors When a user adds bridge neighbors, allow him to specify VLAN id. If the VLAN id is not specified, the neighbor will be added for VLANs currently in the ports filter list. If no VLANs are configured on the port, we use vlan 0 and only add 1 entry. Signed-off-by: Vlad Yasevich Acked-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 4 +- drivers/net/macvlan.c | 2 +- drivers/net/vxlan.c | 3 +- include/linux/netdevice.h | 4 +- include/uapi/linux/neighbour.h | 1 + net/bridge/br_fdb.c | 148 ++++++++++++++++++++--- net/bridge/br_private.h | 6 +- net/core/rtnetlink.c | 26 ++-- 10 files changed, 162 insertions(+), 35 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4e2aa47193cb..1c0efcb7920f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7002,7 +7002,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, +static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 937bcc3d3212..5088dc5c3d1a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1959,6 +1959,7 @@ static int mlx4_en_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } static int mlx4_en_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index b745194391a1..b95316831587 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -247,8 +247,8 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p) return 0; } -static int qlcnic_fdb_del(struct ndmsg *ndm, struct net_device *netdev, - const unsigned char *addr) +static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *netdev, const unsigned char *addr) { struct qlcnic_adapter *adapter = netdev_priv(netdev); int err = -EOPNOTSUPP; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index e4b8078e88a9..defcd8a85744 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -599,7 +599,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int macvlan_fdb_del(struct ndmsg *ndm, +static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 72485b9b9005..9d70421cf3a0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -393,7 +393,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* Delete entry (via netlink) */ -static int vxlan_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct vxlan_dev *vxlan = netdev_priv(dev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1964ca66df56..9deb672d999f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -884,7 +884,8 @@ struct netdev_fcoe_hbainfo { * struct net_device *dev, * const unsigned char *addr, u16 flags) * Adds an FDB entry to dev for addr. - * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev, + * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], + * struct net_device *dev, * const unsigned char *addr) * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -1008,6 +1009,7 @@ struct net_device_ops { const unsigned char *addr, u16 flags); int (*ndo_fdb_del)(struct ndmsg *ndm, + struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); int (*ndo_fdb_dump)(struct sk_buff *skb, diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 275e5d65dcb2..adb068c53c4e 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -20,6 +20,7 @@ enum { NDA_LLADDR, NDA_CACHEINFO, NDA_PROBES, + NDA_VLAN, __NDA_MAX }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 276a52254606..4b75ad43aa85 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -505,6 +505,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; + + if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -516,6 +520,7 @@ static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -617,6 +622,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, return 0; } +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, + const unsigned char *addr, u16 nlh_flags, u16 vid) +{ + int err = 0; + + if (ndm->ndm_flags & NTF_USE) { + rcu_read_lock(); + br_fdb_update(p->br, p, addr, vid); + rcu_read_unlock(); + } else { + spin_lock_bh(&p->br->hash_lock); + err = fdb_add_entry(p, addr, ndm->ndm_state, + nlh_flags, vid); + spin_unlock_bh(&p->br->hash_lock); + } + + return err; +} + /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, @@ -624,12 +648,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { struct net_bridge_port *p; int err = 0; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); return -EINVAL; } + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", @@ -637,41 +678,90 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (ndm->ndm_flags & NTF_USE) { - rcu_read_lock(); - br_fdb_update(p->br, p, addr, 0); - rcu_read_unlock(); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_NEWNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + /* VID was specified, so use it. */ + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags, - 0); - spin_unlock_bh(&p->br->hash_lock); + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + if (err) + goto out; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } } +out: return err; } -static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) +static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, + u16 vlan) { - struct net_bridge *br = p->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr, 0); + fdb = fdb_find(head, addr, vlan); if (!fdb) return -ENOENT; - fdb_delete(p->br, fdb); + fdb_delete(br, fdb); return 0; } +static int __br_fdb_delete(struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + int err; + + spin_lock_bh(&p->br->hash_lock); + err = fdb_delete_by_addr(p->br, addr, vid); + spin_unlock_bh(&p->br->hash_lock); + + return err; +} + /* Remove neighbor entry with RTM_DELNEIGH */ -int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct net_bridge_port *p; int err; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", @@ -679,9 +769,33 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, return -EINVAL; } - spin_lock_bh(&p->br->hash_lock); - err = fdb_delete_by_addr(p, addr); - spin_unlock_bh(&p->br->hash_lock); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_DELNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + err = __br_fdb_delete(p, addr, vid); + } else { + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_delete(p, addr, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + err = -ENOENT; + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err &= __br_fdb_delete(p, addr, vid); + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } + } +out: return err; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 22915c8e9961..799dbb37e5a2 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -388,7 +388,7 @@ extern void br_fdb_update(struct net_bridge *br, const unsigned char *addr, u16 vid); -extern int br_fdb_delete(struct ndmsg *ndm, +extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], @@ -577,13 +577,13 @@ extern void nbp_vlan_flush(struct net_bridge_port *port); static inline struct net_port_vlans *br_get_vlan_info( const struct net_bridge *br) { - return rcu_dereference(br->vlan_info); + return rcu_dereference_rtnl(br->vlan_info); } static inline struct net_port_vlans *nbp_get_vlan_info( const struct net_bridge_port *p) { - return rcu_dereference(p->vlan_info); + return rcu_dereference_rtnl(p->vlan_info); } /* Since bridge now depends on 8021Q module, but the time bridge sees the diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f3a112ec86d5..d8aa20f6a46e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2119,13 +2119,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; - struct nlattr *llattr; + struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; int err = -EINVAL; __u8 *addr; - if (nlmsg_len(nlh) < sizeof(*ndm)) - return -EINVAL; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) + return err; ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { @@ -2139,13 +2143,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; } - llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); - if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { - pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n"); + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(tb[NDA_LLADDR]); + if (!is_valid_ether_addr(addr)) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); return -EINVAL; } - addr = nla_data(llattr); err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ @@ -2155,7 +2163,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) const struct net_device_ops *ops = br_dev->netdev_ops; if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, dev, addr); + err = ops->ndo_fdb_del(ndm, tb, dev, addr); if (err) goto out; @@ -2165,7 +2173,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr); + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); -- cgit v1.2.3 From 35e03f3a0275a1ba57e432d7c948cf6f70fbb37a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:20 +0000 Subject: bridge: Separate egress policy bitmap Add an ability to configure a separate "untagged" egress policy to the VLAN information of the bridge. This superseeds PVID policy and makes PVID ingress-only. The policy is configured with a new flag and is represented as a port bitmap per vlan. Egress frames with a VLAN id in "untagged" policy bitmap would egress the port without VLAN header. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + net/bridge/br_netlink.c | 4 ++++ net/bridge/br_private.h | 1 + net/bridge/br_vlan.c | 20 ++++++++++++++------ 4 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index c6c30e28f396..f1bf8d34ac9f 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -121,6 +121,7 @@ enum { #define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ #define BRIDGE_VLAN_INFO_PVID (1<<1) /* VLAN is PVID, ingress untagged */ +#define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */ struct bridge_vlan_info { u16 flags; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e044cc0b5650..d1dda476d743 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -143,6 +143,10 @@ static int br_fill_ifinfo(struct sk_buff *skb, vinfo.flags = 0; if (vid == pvid) vinfo.flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, sizeof(vinfo), &vinfo)) goto nla_put_failure; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 32ecfa4ef47f..6d314c4e6bcb 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -75,6 +75,7 @@ struct net_port_vlans { } parent; struct rcu_head rcu; unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + unsigned long untagged_bitmap[BR_VLAN_BITMAP_LEN]; u16 num_vlans; }; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 9ea358fbbf78..93dde75923f0 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -23,6 +23,15 @@ static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) v->pvid = 0; } +static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) +{ + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) + set_bit(vid, v->untagged_bitmap); +} + static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) { struct net_bridge_port *p = NULL; @@ -31,8 +40,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) int err; if (test_bit(vid, v->vlan_bitmap)) { - if (flags & BRIDGE_VLAN_INFO_PVID) - __vlan_add_pvid(v, vid); + __vlan_add_flags(v, vid, flags); return 0; } @@ -69,8 +77,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) set_bit(vid, v->vlan_bitmap); v->num_vlans++; - if (flags & BRIDGE_VLAN_INFO_PVID) - __vlan_add_pvid(v, vid); + __vlan_add_flags(v, vid, flags); return 0; @@ -86,6 +93,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) return -EINVAL; __vlan_delete_pvid(v, vid); + clear_bit(vid, v->untagged_bitmap); if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; @@ -144,11 +152,11 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, goto out; /* At this point, we know that the frame was filtered and contains - * a valid vlan id. If the vlan id matches the pvid of current port + * a valid vlan id. If the vlan id is set in the untagged bitmap, * send untagged; otherwise, send taged. */ br_vlan_get_tag(skb, &vid); - if (vid == br_get_pvid(pv)) + if (test_bit(vid, pv->untagged_bitmap)) skb = br_vlan_untag(skb); else { /* Egress policy says "send tagged". If output device -- cgit v1.2.3 From 9f89ec82521957de807dc0d56264ee226bbe9b98 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 14 Feb 2013 13:32:31 +0800 Subject: bridge: use __u16 in if_bridge.h We should use "__u16" instead of "u16" in the user-space visable header. Cc: Vlad Yasevich Cc: Stephen Hemminger Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index f1bf8d34ac9f..2d70d79ce2fd 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -124,8 +124,8 @@ enum { #define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */ struct bridge_vlan_info { - u16 flags; - u16 vid; + __u16 flags; + __u16 vid; }; /* Bridge multicast database attributes -- cgit v1.2.3 From 04f39047af2a6df64b763ea5a271db24879d0391 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Fri, 8 Feb 2013 18:16:19 +0100 Subject: nl80211/cfg80211: add radar detection command/event Add new NL80211_CMD_RADAR_DETECT, which starts the Channel Availability Check (CAC). This command will also notify the usermode about events (CAC finished, CAC aborted, radar detected, NOP finished). Once radar detection has started it should continuously monitor for radars as long as the channel is active. This patch enables DFS for AP mode in nl80211/cfg80211. Based on original patch by Victor Goldenshtein Signed-off-by: Simon Wunderlich [remove WIPHY_FLAG_HAS_RADAR_DETECT again -- my mistake] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 46 +++++++++++++++ include/uapi/linux/nl80211.h | 61 +++++++++++++++++++ net/wireless/chan.c | 129 ++++++++++++++++++++++++++++++++++++++++- net/wireless/core.c | 3 + net/wireless/core.h | 28 +++++++++ net/wireless/mlme.c | 120 ++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 135 +++++++++++++++++++++++++++++++++++++++++-- net/wireless/nl80211.h | 7 +++ net/wireless/reg.c | 3 + net/wireless/scan.c | 10 ---- net/wireless/trace.h | 45 +++++++++++++++ 11 files changed, 570 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7e6569e1f16f..ee11a3db730b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -114,6 +114,9 @@ enum ieee80211_channel_flags { #define IEEE80211_CHAN_NO_HT40 \ (IEEE80211_CHAN_NO_HT40PLUS | IEEE80211_CHAN_NO_HT40MINUS) +#define IEEE80211_DFS_MIN_CAC_TIME_MS 60000 +#define IEEE80211_DFS_MIN_NOP_TIME_MS (30 * 60 * 1000) + /** * struct ieee80211_channel - channel definition * @@ -134,6 +137,9 @@ enum ieee80211_channel_flags { * to enable this, this is useful only on 5 GHz band. * @orig_mag: internal use * @orig_mpwr: internal use + * @dfs_state: current state of this channel. Only relevant if radar is required + * on this channel. + * @dfs_state_entered: timestamp (jiffies) when the dfs state was entered. */ struct ieee80211_channel { enum ieee80211_band band; @@ -146,6 +152,8 @@ struct ieee80211_channel { bool beacon_found; u32 orig_flags; int orig_mag, orig_mpwr; + enum nl80211_dfs_state dfs_state; + unsigned long dfs_state_entered; }; /** @@ -569,6 +577,7 @@ struct cfg80211_acl_data { * @p2p_opp_ps: P2P opportunistic PS * @acl: ACL configuration used by the drivers which has support for * MAC address based access control + * @radar_required: set if radar detection is required */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -586,6 +595,7 @@ struct cfg80211_ap_settings { u8 p2p_ctwindow; bool p2p_opp_ps; const struct cfg80211_acl_data *acl; + bool radar_required; }; /** @@ -1909,6 +1919,8 @@ struct cfg80211_gtk_rekey_data { * this new list replaces the existing one. Driver has to clear its ACL * when number of MAC addresses entries is passed as 0. Drivers which * advertise the support for MAC based ACL have to implement this callback. + * + * @start_radar_detection: Start radar detection in the driver. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2132,6 +2144,10 @@ struct cfg80211_ops { int (*set_mac_acl)(struct wiphy *wiphy, struct net_device *dev, const struct cfg80211_acl_data *params); + + int (*start_radar_detection)(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_chan_def *chandef); }; /* @@ -2715,6 +2731,8 @@ struct cfg80211_cached_keys; * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL * @p2p_started: true if this is a P2P Device that has been started + * @cac_started: true if DFS channel availability check has been started + * @cac_start_time: timestamp (jiffies) when the dfs state was entered. */ struct wireless_dev { struct wiphy *wiphy; @@ -2766,6 +2784,9 @@ struct wireless_dev { u32 ap_unexpected_nlportid; + bool cac_started; + unsigned long cac_start_time; + #ifdef CONFIG_CFG80211_WEXT /* wext data */ struct { @@ -3754,6 +3775,31 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +/** + * cfg80211_radar_event - radar detection event + * @wiphy: the wiphy + * @chandef: chandef for the current channel + * @gfp: context flags + * + * This function is called when a radar is detected on the current chanenl. + */ +void cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, gfp_t gfp); + +/** + * cfg80211_cac_event - Channel availability check (CAC) event + * @netdev: network device + * @event: type of event + * @gfp: context flags + * + * This function is called when a Channel availability check (CAC) is finished + * or aborted. This must be called to notify the completion of a CAC process, + * also by full-MAC drivers. + */ +void cfg80211_cac_event(struct net_device *netdev, + enum nl80211_radar_event event, gfp_t gfp); + + /** * cfg80211_cqm_pktloss_notify - notify userspace about packetloss to peer * @dev: network device diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5309b34930ea..90b7af86f392 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -603,6 +603,14 @@ * command is used in AP/P2P GO mode. Driver has to make sure to clear its * ACL list during %NL80211_CMD_STOP_AP. * + * @NL80211_CMD_RADAR_DETECT: Start a Channel availability check (CAC). Once + * a radar is detected or the channel availability scan (CAC) has finished + * or was aborted, or a radar was detected, usermode will be notified with + * this event. This command is also used to notify userspace about radars + * while operating on this channel. + * %NL80211_ATTR_RADAR_EVENT is used to inform about the type of the + * event. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -755,6 +763,8 @@ enum nl80211_commands { NL80211_CMD_SET_MAC_ACL, + NL80211_CMD_RADAR_DETECT, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1342,6 +1352,9 @@ enum nl80211_commands { * number of MAC addresses that a device can support for MAC * ACL. * + * @NL80211_ATTR_RADAR_EVENT: Type of radar event for notification to userspace, + * contains a value of enum nl80211_radar_event (u32). + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1620,6 +1633,8 @@ enum nl80211_attrs { NL80211_ATTR_MAC_ACL_MAX, + NL80211_ATTR_RADAR_EVENT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2022,6 +2037,10 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm * (100 * dBm). + * @NL80211_FREQUENCY_ATTR_DFS_STATE: current state for DFS + * (enum nl80211_dfs_state) + * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in miliseconds for how long + * this channel is in this DFS state. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -2034,6 +2053,8 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_IBSS, NL80211_FREQUENCY_ATTR_RADAR, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, + NL80211_FREQUENCY_ATTR_DFS_STATE, + NL80211_FREQUENCY_ATTR_DFS_TIME, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -3489,4 +3510,44 @@ enum nl80211_acl_policy { NL80211_ACL_POLICY_DENY_UNLESS_LISTED, }; +/** + * enum nl80211_radar_event - type of radar event for DFS operation + * + * Type of event to be used with NL80211_ATTR_RADAR_EVENT to inform userspace + * about detected radars or success of the channel available check (CAC) + * + * @NL80211_RADAR_DETECTED: A radar pattern has been detected. The channel is + * now unusable. + * @NL80211_RADAR_CAC_FINISHED: Channel Availability Check has been finished, + * the channel is now available. + * @NL80211_RADAR_CAC_ABORTED: Channel Availability Check has been aborted, no + * change to the channel status. + * @NL80211_RADAR_NOP_FINISHED: The Non-Occupancy Period for this channel is + * over, channel becomes usable. + */ +enum nl80211_radar_event { + NL80211_RADAR_DETECTED, + NL80211_RADAR_CAC_FINISHED, + NL80211_RADAR_CAC_ABORTED, + NL80211_RADAR_NOP_FINISHED, +}; + +/** + * enum nl80211_dfs_state - DFS states for channels + * + * Channel states used by the DFS code. + * + * @IEEE80211_DFS_USABLE: The channel can be used, but channel availability + * check (CAC) must be performed before using it for AP or IBSS. + * @IEEE80211_DFS_UNAVAILABLE: A radar has been detected on this channel, it + * is therefore marked as not available. + * @IEEE80211_DFS_AVAILABLE: The channel has been CAC checked and is available. + */ + +enum nl80211_dfs_state { + NL80211_DFS_USABLE, + NL80211_DFS_UNAVAILABLE, + NL80211_DFS_AVAILABLE, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 396373f3ec26..810c23cfb894 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -147,6 +147,32 @@ static void chandef_primary_freqs(const struct cfg80211_chan_def *c, } } +static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) +{ + int width; + + switch (c->width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + width = 20; + break; + case NL80211_CHAN_WIDTH_40: + width = 40; + break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_80: + width = 80; + break; + case NL80211_CHAN_WIDTH_160: + width = 160; + break; + default: + WARN_ON_ONCE(1); + return -1; + } + return width; +} + const struct cfg80211_chan_def * cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2) @@ -192,6 +218,93 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, } EXPORT_SYMBOL(cfg80211_chandef_compatible); +static void cfg80211_set_chans_dfs_state(struct wiphy *wiphy, u32 center_freq, + u32 bandwidth, + enum nl80211_dfs_state dfs_state) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c || !(c->flags & IEEE80211_CHAN_RADAR)) + continue; + + c->dfs_state = dfs_state; + c->dfs_state_entered = jiffies; + } +} + +void cfg80211_set_dfs_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state) +{ + int width; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return; + + cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq1, + width, dfs_state); + + if (!chandef->center_freq2) + return; + cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq2, + width, dfs_state); +} + +static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, + u32 center_freq, + u32 bandwidth) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c) + return -EINVAL; + + if (c->flags & IEEE80211_CHAN_RADAR) + return 1; + } + return 0; +} + + +int cfg80211_chandef_dfs_required(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef) +{ + int width; + int r; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return -EINVAL; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return -EINVAL; + + r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1, + width); + if (r) + return r; + + if (!chandef->center_freq2) + return 0; + + return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2, + width); +} + static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 center_freq, u32 bandwidth, u32 prohibited_flags) @@ -203,7 +316,16 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, freq <= center_freq + bandwidth/2 - 10; freq += 20) { c = ieee80211_get_channel(wiphy, freq); - if (!c || c->flags & prohibited_flags) + if (!c) + return false; + + /* check for radar flags */ + if ((prohibited_flags & c->flags & IEEE80211_CHAN_RADAR) && + (c->dfs_state != NL80211_DFS_AVAILABLE)) + return false; + + /* check for the other flags */ + if (c->flags & prohibited_flags & ~IEEE80211_CHAN_RADAR) return false; } @@ -344,7 +466,10 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (wdev->beacon_interval) { + if (wdev->cac_started) { + *chan = wdev->channel; + *chanmode = CHAN_MODE_SHARED; + } else if (wdev->beacon_interval) { *chan = wdev->channel; *chanmode = CHAN_MODE_SHARED; } diff --git a/net/wireless/core.c b/net/wireless/core.c index f0a1bbe95cff..922002105062 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -324,6 +324,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) INIT_LIST_HEAD(&rdev->bss_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results); + INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk, + cfg80211_dfs_channels_update_work); #ifdef CONFIG_CFG80211_WEXT rdev->wiphy.wext = &cfg80211_wext_handler; #endif @@ -695,6 +697,7 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->scan_done_wk); cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); + cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); if (rdev->wowlan && rdev->ops->set_wakeup) rdev_set_wakeup(rdev, false); diff --git a/net/wireless/core.h b/net/wireless/core.h index 949c9573d8d7..3aec0e429d8a 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -86,6 +86,8 @@ struct cfg80211_registered_device { struct cfg80211_wowlan *wowlan; + struct delayed_work dfs_update_channels_wk; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); @@ -431,6 +433,22 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode, u8 radar_detect); +/** + * cfg80211_chandef_dfs_required - checks if radar detection is required + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * Return: 1 if radar detection is required, 0 if it is not, < 0 on error + */ +int cfg80211_chandef_dfs_required(struct wiphy *wiphy, + const struct cfg80211_chan_def *c); + +void cfg80211_set_dfs_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state); + +void cfg80211_dfs_channels_update_work(struct work_struct *work); + + static inline int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, @@ -457,6 +475,16 @@ cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, chan, chanmode, 0); } +static inline unsigned int elapsed_jiffies_msecs(unsigned long start) +{ + unsigned long end = jiffies; + + if (end >= start) + return jiffies_to_msecs(end - start); + + return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); +} + void cfg80211_get_chan_state(struct wireless_dev *wdev, struct ieee80211_channel **chan, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 8e6920728c43..caddca35d686 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -987,3 +987,123 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); } EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); + +void cfg80211_dfs_channels_update_work(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct cfg80211_registered_device *rdev; + struct cfg80211_chan_def chandef; + struct ieee80211_supported_band *sband; + struct ieee80211_channel *c; + struct wiphy *wiphy; + bool check_again = false; + unsigned long timeout, next_time = 0; + int bandid, i; + + delayed_work = container_of(work, struct delayed_work, work); + rdev = container_of(delayed_work, struct cfg80211_registered_device, + dfs_update_channels_wk); + wiphy = &rdev->wiphy; + + mutex_lock(&cfg80211_mutex); + for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { + sband = wiphy->bands[bandid]; + if (!sband) + continue; + + for (i = 0; i < sband->n_channels; i++) { + c = &sband->channels[i]; + + if (c->dfs_state != NL80211_DFS_UNAVAILABLE) + continue; + + timeout = c->dfs_state_entered + + IEEE80211_DFS_MIN_NOP_TIME_MS; + + if (time_after_eq(jiffies, timeout)) { + c->dfs_state = NL80211_DFS_USABLE; + cfg80211_chandef_create(&chandef, c, + NL80211_CHAN_NO_HT); + + nl80211_radar_notify(rdev, &chandef, + NL80211_RADAR_NOP_FINISHED, + NULL, GFP_ATOMIC); + continue; + } + + if (!check_again) + next_time = timeout - jiffies; + else + next_time = min(next_time, timeout - jiffies); + check_again = true; + } + } + mutex_unlock(&cfg80211_mutex); + + /* reschedule if there are other channels waiting to be cleared again */ + if (check_again) + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, + next_time); +} + + +void cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + unsigned long timeout; + + trace_cfg80211_radar_event(wiphy, chandef); + + /* only set the chandef supplied channel to unavailable, in + * case the radar is detected on only one of multiple channels + * spanned by the chandef. + */ + cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); + + timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_NOP_TIME_MS); + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, + timeout); + + nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp); +} +EXPORT_SYMBOL(cfg80211_radar_event); + +void cfg80211_cac_event(struct net_device *netdev, + enum nl80211_radar_event event, gfp_t gfp) +{ + struct wireless_dev *wdev = netdev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_chan_def chandef; + unsigned long timeout; + + trace_cfg80211_cac_event(netdev, event); + + if (WARN_ON(!wdev->cac_started)) + return; + + if (WARN_ON(!wdev->channel)) + return; + + cfg80211_chandef_create(&chandef, wdev->channel, NL80211_CHAN_NO_HT); + + switch (event) { + case NL80211_RADAR_CAC_FINISHED: + timeout = wdev->cac_start_time + + msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); + WARN_ON(!time_after_eq(jiffies, timeout)); + cfg80211_set_dfs_state(wiphy, &chandef, NL80211_DFS_AVAILABLE); + break; + case NL80211_RADAR_CAC_ABORTED: + break; + default: + WARN_ON(1); + return; + } + wdev->cac_started = false; + + nl80211_radar_notify(rdev, &chandef, event, netdev, gfp); +} +EXPORT_SYMBOL(cfg80211_cac_event); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d29a461b4981..c1e18ccf4049 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -552,9 +552,16 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_RADAR) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) - goto nla_put_failure; + if (chan->flags & IEEE80211_CHAN_RADAR) { + u32 time = elapsed_jiffies_msecs(chan->dfs_state_entered); + if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) + goto nla_put_failure; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE, + chan->dfs_state)) + goto nla_put_failure; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, time)) + goto nla_put_failure; + } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, DBM_TO_MBM(chan->max_power))) @@ -2775,6 +2782,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings params; int err; + u8 radar_detect_width = 0; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) @@ -2893,9 +2901,19 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) return -EINVAL; + err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef); + if (err < 0) + return err; + if (err) { + radar_detect_width = BIT(params.chandef.width); + params.radar_required = true; + } + mutex_lock(&rdev->devlist_mtx); - err = cfg80211_can_use_chan(rdev, wdev, params.chandef.chan, - CHAN_MODE_SHARED); + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + params.chandef.chan, + CHAN_MODE_SHARED, + radar_detect_width); mutex_unlock(&rdev->devlist_mtx); if (err) @@ -5055,6 +5073,54 @@ static int nl80211_stop_sched_scan(struct sk_buff *skb, return err; } +static int nl80211_start_radar_detection(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_chan_def chandef; + int err; + + err = nl80211_parse_chandef(rdev, info, &chandef); + if (err) + return err; + + if (wdev->cac_started) + return -EBUSY; + + err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef); + if (err < 0) + return err; + + if (err == 0) + return -EINVAL; + + if (chandef.chan->dfs_state != NL80211_DFS_USABLE) + return -EINVAL; + + if (!rdev->ops->start_radar_detection) + return -EOPNOTSUPP; + + mutex_lock(&rdev->devlist_mtx); + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + chandef.chan, CHAN_MODE_SHARED, + BIT(chandef.width)); + if (err) + goto err_locked; + + err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef); + if (!err) { + wdev->channel = chandef.chan; + wdev->cac_started = true; + wdev->cac_start_time = jiffies; + } +err_locked: + mutex_unlock(&rdev->devlist_mtx); + + return err; +} + static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -8305,6 +8371,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_RADAR_DETECT, + .doit = nl80211_start_radar_detection, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -9501,6 +9575,57 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void +nl80211_radar_notify(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event, + struct net_device *netdev, gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_RADAR_DETECT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) + goto nla_put_failure; + + /* NOP and radar events don't need a netdev parameter */ + if (netdev) { + struct wireless_dev *wdev = netdev->ieee80211_ptr; + + if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto nla_put_failure; + } + + if (nla_put_u32(msg, NL80211_ATTR_RADAR_EVENT, event)) + goto nla_put_failure; + + if (nl80211_send_chandef(msg, chandef)) + goto nla_put_failure; + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 2acba8477e9d..b061da4919e1 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -108,6 +108,13 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); + +void +nl80211_radar_notify(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event, + struct net_device *netdev, gfp_t gfp); + void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 08d3da2c70ab..e97d5b071ab6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -884,6 +884,9 @@ static void handle_channel(struct wiphy *wiphy, return; } + chan->dfs_state = NL80211_DFS_USABLE; + chan->dfs_state_entered = jiffies; + chan->beacon_found = false; chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d0fc6da2d097..f0d9b5154bab 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1210,16 +1210,6 @@ static void ieee80211_scan_add_ies(struct iw_request_info *info, } } -static inline unsigned int elapsed_jiffies_msecs(unsigned long start) -{ - unsigned long end = jiffies; - - if (end >= start) - return jiffies_to_msecs(end - start); - - return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); -} - static char * ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, struct cfg80211_internal_bss *bss, char *current_ev, diff --git a/net/wireless/trace.h b/net/wireless/trace.h index c9cafb0ea95f..b7a531380e19 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2051,6 +2051,21 @@ TRACE_EVENT(cfg80211_reg_can_beacon, WIPHY_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_chandef_dfs_required, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + TRACE_EVENT(cfg80211_ch_switch_notify, TP_PROTO(struct net_device *netdev, struct cfg80211_chan_def *chandef), @@ -2067,6 +2082,36 @@ TRACE_EVENT(cfg80211_ch_switch_notify, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_radar_event, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + +TRACE_EVENT(cfg80211_cac_event, + TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt), + TP_ARGS(netdev, evt), + TP_STRUCT__entry( + NETDEV_ENTRY + __field(enum nl80211_radar_event, evt) + ), + TP_fast_assign( + NETDEV_ASSIGN; + __entry->evt = evt; + ), + TP_printk(NETDEV_PR_FMT ", event: %d", + NETDEV_PR_ARG, __entry->evt) +); + DECLARE_EVENT_CLASS(cfg80211_rx_evt, TP_PROTO(struct net_device *netdev, const u8 *addr), TP_ARGS(netdev, addr), -- cgit v1.2.3 From 50640f169372b9977487a328dedf13a8debedff7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Dec 2012 17:59:39 +0100 Subject: nl80211: advertise HT/VHT channel limitations When drivers or regulatory have limitations on 40, 80 or 160 MHz channels, advertise these to userspace via nl80211. Also add a new feature flag to let userspace know this is supported. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 17 +++++++++++++++++ net/wireless/core.c | 3 ++- net/wireless/nl80211.c | 12 ++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 90b7af86f392..3880f6ad7ed1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2041,6 +2041,16 @@ enum nl80211_band_attr { * (enum nl80211_dfs_state) * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in miliseconds for how long * this channel is in this DFS state. + * @NL80211_FREQUENCY_ATTR_NO_HT40_MINUS: HT40- isn't possible with this + * channel as the control channel + * @NL80211_FREQUENCY_ATTR_NO_HT40_PLUS: HT40+ isn't possible with this + * channel as the control channel + * @NL80211_FREQUENCY_ATTR_NO_80MHZ: any 80 MHz channel using this channel + * as the primary or any of the secondary channels isn't possible, + * this includes 80+80 channels + * @NL80211_FREQUENCY_ATTR_NO_160MHZ: any 160 MHz (but not 80+80) channel + * using this channel as the primary or any of the secondary channels + * isn't possible * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -2055,6 +2065,10 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_MAX_TX_POWER, NL80211_FREQUENCY_ATTR_DFS_STATE, NL80211_FREQUENCY_ATTR_DFS_TIME, + NL80211_FREQUENCY_ATTR_NO_HT40_MINUS, + NL80211_FREQUENCY_ATTR_NO_HT40_PLUS, + NL80211_FREQUENCY_ATTR_NO_80MHZ, + NL80211_FREQUENCY_ATTR_NO_160MHZ, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -3421,6 +3435,8 @@ enum nl80211_ap_sme_features { * Note that even for drivers that support this, the default is to add * stations in authenticated/associated state, so to add unauthenticated * stations the authenticated/associated bits have to be set in the mask. + * @NL80211_FEATURE_ADVERTISE_CHAN_LIMITS: cfg80211 advertises channel limits + * (HT40, VHT 80/160 MHz) if this flag is set */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3437,6 +3453,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 13, + NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14, }; /** diff --git a/net/wireless/core.c b/net/wireless/core.c index 922002105062..33b75b9b8efa 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -367,7 +367,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; - rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH; + rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH | + NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; return &rdev->wiphy; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c1e18ccf4049..7e40b9e82b45 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -562,6 +562,18 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, time)) goto nla_put_failure; } + if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) + goto nla_put_failure; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, DBM_TO_MBM(chan->max_power))) -- cgit v1.2.3 From a50df0c4c0d97170a6c43573612acacc43e62fe7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Feb 2013 14:20:05 +0100 Subject: cfg80211: advertise extended capabilities to userspace In many cases, userspace may need to know which of the 802.11 extended capabilities ("Extended Capabilities element") are implemented in the driver or device, to include them e.g. in beacons, assoc request/response or other frames. Add a new nl80211 attribute to hold the extended capabilities bitmap for this. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ include/uapi/linux/nl80211.h | 9 +++++++++ net/wireless/nl80211.c | 9 +++++++++ 3 files changed, 29 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8a9200f2f4a4..a229046d86d4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2480,6 +2480,14 @@ struct wiphy_wowlan_support { * * @max_acl_mac_addrs: Maximum number of MAC addresses that the device * supports for ACL. + * + * @extended_capabilities: extended capabilities supported by the driver, + * additional capabilities might be supported by userspace; these are + * the 802.11 extended capabilities ("Extended Capabilities element") + * and are in the same format as in the information element. See + * 802.11-2012 8.4.2.29 for the defined fields. + * @extended_capabilities_mask: mask of the valid values + * @extended_capabilities_len: length of the extended capabilities */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -2546,6 +2554,9 @@ struct wiphy { */ u32 probe_resp_offload; + const u8 *extended_capabilities, *extended_capabilities_mask; + u8 extended_capabilities_len; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3880f6ad7ed1..1fd6e5611896 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1355,6 +1355,12 @@ enum nl80211_commands { * @NL80211_ATTR_RADAR_EVENT: Type of radar event for notification to userspace, * contains a value of enum nl80211_radar_event (u32). * + * @NL80211_ATTR_EXT_CAPA: 802.11 extended capabilities that the kernel driver + * has and handles. The format is the same as the IE contents. See + * 802.11-2012 8.4.2.29 for more information. + * @NL80211_ATTR_EXT_CAPA_MASK: Extended capabilities that the kernel driver + * has set in the %NL80211_ATTR_EXT_CAPA value, for multibit fields. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1635,6 +1641,9 @@ enum nl80211_attrs { NL80211_ATTR_RADAR_EVENT, + NL80211_ATTR_EXT_CAPA, + NL80211_ATTR_EXT_CAPA_MASK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7e40b9e82b45..1237431c3efa 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1363,6 +1363,15 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.max_acl_mac_addrs)) goto nla_put_failure; + if (dev->wiphy.extended_capabilities && + (nla_put(msg, NL80211_ATTR_EXT_CAPA, + dev->wiphy.extended_capabilities_len, + dev->wiphy.extended_capabilities) || + nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, + dev->wiphy.extended_capabilities_len, + dev->wiphy.extended_capabilities_mask))) + goto nla_put_failure; + return genlmsg_end(msg, hdr); nla_put_failure: -- cgit v1.2.3 From 9d62a98617298c1da288f50e84c5dd67732e79b7 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 14 Feb 2013 21:10:13 +0200 Subject: cfg80211: Pass station (extended) capability info to kernel The information of the peer's capabilities and extended capabilities are required for the driver to perform TDLS Peer UAPSD operations and off channel operations. This information of the peer is passed from user space using NL80211_CMD_SET_STATION command. This commit enhances the function nl80211_set_station to pass the capability information of the peer to the driver. Similarly, there may be need for capability information for other modes, so allow this to be provided with both add_station and change_station. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++++++ include/uapi/linux/nl80211.h | 10 ++++++++++ net/wireless/nl80211.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a229046d86d4..fa2612952c19 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -626,12 +626,14 @@ enum plink_actions { /** * enum station_parameters_apply_mask - station parameter values to apply * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp) + * @STATION_PARAM_APPLY_CAPABILITY: apply new capability * * Not all station parameters have in-band "no change" signalling, * for those that don't these flags will are used. */ enum station_parameters_apply_mask { STATION_PARAM_APPLY_UAPSD = BIT(0), + STATION_PARAM_APPLY_CAPABILITY = BIT(1), }; /** @@ -662,6 +664,9 @@ enum station_parameters_apply_mask { * see &enum station_parameters_apply_mask * @local_pm: local link-specific mesh power save mode (no change when set * to unknown) + * @capability: station capability + * @ext_capab: extended capabilities of the station + * @ext_capab_len: number of extended capabilities */ struct station_parameters { u8 *supported_rates; @@ -678,6 +683,9 @@ struct station_parameters { u8 uapsd_queues; u8 max_sp; enum nl80211_mesh_power_mode local_pm; + u16 capability; + u8 *ext_capab; + u8 ext_capab_len; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1fd6e5611896..f7c35ca01efc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1361,6 +1361,13 @@ enum nl80211_commands { * @NL80211_ATTR_EXT_CAPA_MASK: Extended capabilities that the kernel driver * has set in the %NL80211_ATTR_EXT_CAPA value, for multibit fields. * + * @NL80211_ATTR_STA_CAPABILITY: Station capabilities (u16) are advertised to + * the driver, e.g., to enable TDLS power save (PU-APSD). + * + * @NL80211_ATTR_STA_EXT_CAPABILITY: Station extended capabilities are + * advertised to the driver, e.g., to enable TDLS off channel operations + * and PU-APSD. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1644,6 +1651,9 @@ enum nl80211_attrs { NL80211_ATTR_EXT_CAPA, NL80211_ATTR_EXT_CAPA_MASK, + NL80211_ATTR_STA_CAPABILITY, + NL80211_ATTR_STA_EXT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1237431c3efa..be9f2b5a403f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -368,6 +368,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, + [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, + [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, }; /* policy for the key attributes */ @@ -3435,6 +3437,19 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); } + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { + params.capability = + nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); + params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; + } + + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { + params.ext_capab = + nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + params.ext_capab_len = + nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + } + if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL] || info->attrs[NL80211_ATTR_HT_CAPABILITY]) return -EINVAL; @@ -3505,6 +3520,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* reject other things that can't change */ if (params.supported_rates) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) + return -EINVAL; /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); @@ -3537,6 +3556,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (params.supported_rates) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) + return -EINVAL; /* * No special handling for TDLS here -- the userspace * mesh code doesn't have this bug. @@ -3601,6 +3624,19 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { + params.capability = + nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); + params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; + } + + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { + params.ext_capab = + nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + params.ext_capab_len = + nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + } + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); -- cgit v1.2.3 From 932dd97c5fef091dd6f605fb1d40143d67d91e09 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Feb 2013 11:56:13 +0100 Subject: nl80211: renumber NL80211_FEATURE_FULL_AP_CLIENT_STATE Adding the flag to mac80211 already without testing was clearly a mistake, one that we now pay for by having to reserve bit 13 forever. The problem is cfg80211 doesn't allow capability/rate changes for station entries that were added unassociated, so the station entries cannot be set up properly when marked associated. Change the NL80211_FEATURE_FULL_AP_CLIENT_STATE value to make it clear to userspace implementations that all current kernels don't actually support it, even though the previous bit is set, and of course also remove the flag from mac80211 until we test and fix the issues. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 ++- net/mac80211/main.c | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f7c35ca01efc..c46bb016f4e4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3471,8 +3471,9 @@ enum nl80211_feature_flags { NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, - NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 13, + /* bit 13 is reserved */ NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14, + NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 15, }; /** diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 035344bc6b9c..f9747689d604 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -572,8 +572,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | NL80211_FEATURE_SAE | NL80211_FEATURE_HT_IBSS | - NL80211_FEATURE_VIF_TXPOWER | - NL80211_FEATURE_FULL_AP_CLIENT_STATE; + NL80211_FEATURE_VIF_TXPOWER; if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | -- cgit v1.2.3 From 84237a826b261de7ddd3d09ee53ee68cb4138937 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 18 Feb 2013 10:11:13 -0700 Subject: vfio-pci: Add support for VGA region access PCI defines display class VGA regions at I/O port address 0x3b0, 0x3c0 and MMIO address 0xa0000. As these are non-overlapping, we can ignore the I/O port vs MMIO difference and expose them both in a single region. We make use of the VGA arbiter around each access to configure chipset access as necessary. Signed-off-by: Alex Williamson --- drivers/vfio/pci/Kconfig | 10 ++++++ drivers/vfio/pci/vfio_pci.c | 18 +++++++++++ drivers/vfio/pci/vfio_pci_private.h | 4 +++ drivers/vfio/pci/vfio_pci_rdwr.c | 61 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 9 ++++++ 5 files changed, 102 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 5980758563eb..e84300b268b6 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -6,3 +6,13 @@ config VFIO_PCI use of PCI drivers using the VFIO framework. If you don't know what to do here, say N. + +config VFIO_PCI_VGA + bool "VFIO PCI support for VGA devices" + depends on VFIO_PCI && X86 && VGA_ARB && EXPERIMENTAL + help + Support for VGA extension to VFIO PCI. This exposes an additional + region on VGA devices for accessing legacy VGA addresses used by + BIOS and generic video drivers. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index bb8c8c2be960..8189cb6a86af 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -84,6 +84,11 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } else vdev->msix_bar = 0xFF; +#ifdef CONFIG_VFIO_PCI_VGA + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + vdev->has_vga = true; +#endif + return 0; } @@ -285,6 +290,16 @@ static long vfio_pci_ioctl(void *device_data, info.flags = VFIO_REGION_INFO_FLAG_READ; break; } + case VFIO_PCI_VGA_REGION_INDEX: + if (!vdev->has_vga) + return -EINVAL; + + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = 0xc0000; + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + + break; default: return -EINVAL; } @@ -386,6 +401,9 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf, case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite); + + case VFIO_PCI_VGA_REGION_INDEX: + return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); } return -EINVAL; diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 00d19b953ce4..d7e55d03f49e 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -53,6 +53,7 @@ struct vfio_pci_device { bool reset_works; bool extended_caps; bool bardirty; + bool has_vga; struct pci_saved_state *pci_saved_state; atomic_t refcnt; }; @@ -77,6 +78,9 @@ extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); +extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); + extern int vfio_pci_init_perm_bits(void); extern void vfio_pci_uninit_perm_bits(void); diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index e9d78eb91ed7..210db24d2204 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "vfio_pci_private.h" @@ -175,3 +176,63 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, return done; } + +ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + int ret; + loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK; + void __iomem *iomem = NULL; + unsigned int rsrc; + bool is_ioport; + ssize_t done; + + if (!vdev->has_vga) + return -EINVAL; + + switch (pos) { + case 0xa0000 ... 0xbffff: + count = min(count, (size_t)(0xc0000 - pos)); + iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); + off = pos - 0xa0000; + rsrc = VGA_RSRC_LEGACY_MEM; + is_ioport = false; + break; + case 0x3b0 ... 0x3bb: + count = min(count, (size_t)(0x3bc - pos)); + iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1); + off = pos - 0x3b0; + rsrc = VGA_RSRC_LEGACY_IO; + is_ioport = true; + break; + case 0x3c0 ... 0x3df: + count = min(count, (size_t)(0x3e0 - pos)); + iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1); + off = pos - 0x3c0; + rsrc = VGA_RSRC_LEGACY_IO; + is_ioport = true; + break; + default: + return -EINVAL; + } + + if (!iomem) + return -ENOMEM; + + ret = vga_get_interruptible(vdev->pdev, rsrc); + if (ret) { + is_ioport ? ioport_unmap(iomem) : iounmap(iomem); + return ret; + } + + done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite); + + vga_put(vdev->pdev, rsrc); + + is_ioport ? ioport_unmap(iomem) : iounmap(iomem); + + if (done >= 0) + *ppos += done; + + return done; +} diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 4758d1bfcf41..4f41f309911e 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -303,6 +303,15 @@ enum { VFIO_PCI_BAR5_REGION_INDEX, VFIO_PCI_ROM_REGION_INDEX, VFIO_PCI_CONFIG_REGION_INDEX, + /* + * Expose VGA regions defined for PCI base class 03, subclass 00. + * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df + * as well as the MMIO range 0xa0000 to 0xbffff. Each implemented + * range is found at it's identity mapped offset from the region + * offset, for example 0x3b0 is region_info.offset + 0x3b0. Areas + * between described ranges are unimplemented. + */ + VFIO_PCI_VGA_REGION_INDEX, VFIO_PCI_NUM_REGIONS }; -- cgit v1.2.3 From 5b8ca5344f82e594e21c9fbbdf3b13507ecdb5a2 Mon Sep 17 00:00:00 2001 From: Andy King Date: Mon, 18 Feb 2013 06:04:12 +0000 Subject: VSOCK: Remove hypervisor-only socket option Remove hypervisor-only socket option. Reported-by: Gerd Hoffmann Acked-by: Dmitry Torokhov Signed-off-by: Andy King Signed-off-by: David S. Miller --- include/uapi/linux/vm_sockets.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index f7f2e99dec84..df91301847ec 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -52,14 +52,6 @@ #define SO_VM_SOCKETS_PEER_HOST_VM_ID 3 -/* Option name for socket's service label. Use as the option name in - * setsockopt(3) or getsockopt(3) to set or get the service label for a socket. - * The service label is a C-style NUL-terminated string. Only available for - * hypervisor endpoints. - */ - -#define SO_VM_SOCKETS_SERVICE_LABEL 4 - /* Option name for determining if a socket is trusted. Use as the option name * in getsockopt(3) to determine if a socket is trusted. The value is a * signed integer. -- cgit v1.2.3 From 55e301fd57a6239ec14b91a1cf2e70b3dd135194 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Tue, 29 Jan 2013 06:04:50 +0000 Subject: Btrfs: move fs/btrfs/ioctl.h to include/uapi/linux/btrfs.h The header file will then be installed under /usr/include/linux so that userspace applications can refer to Btrfs ioctls by name and use the same structs used internally in the kernel. Signed-off-by: Filipe Brandenburger Signed-off-by: Josef Bacik --- fs/btrfs/backref.h | 2 +- fs/btrfs/ctree.h | 2 +- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/ioctl.h | 502 -------------------------------------------- fs/btrfs/qgroup.c | 2 +- fs/btrfs/super.c | 2 +- fs/btrfs/volumes.h | 2 +- include/linux/btrfs.h | 6 + include/uapi/linux/Kbuild | 1 + include/uapi/linux/btrfs.h | 503 +++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 518 insertions(+), 510 deletions(-) delete mode 100644 fs/btrfs/ioctl.h create mode 100644 include/linux/btrfs.h create mode 100644 include/uapi/linux/btrfs.h (limited to 'include/uapi/linux') diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index d61feca79455..310a7f6d09b1 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -19,7 +19,7 @@ #ifndef __BTRFS_BACKREF__ #define __BTRFS_BACKREF__ -#include "ioctl.h" +#include #include "ulist.h" #include "extent_io.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 541ce9a9949e..69321013683c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -31,10 +31,10 @@ #include #include #include +#include #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" -#include "ioctl.h" struct btrfs_trans_handle; struct btrfs_transaction; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 083abca56055..13c78ea3ebce 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -30,11 +30,11 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "tree-log.h" #include "locking.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 60ec7589900c..fc8aa8bf80a1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -39,12 +39,12 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "ordered-data.h" #include "xattr.h" diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1b554b47e814..96ecefc1724f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -42,12 +42,12 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "volumes.h" #include "locking.h" diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h deleted file mode 100644 index dabca9cc8c2e..000000000000 --- a/fs/btrfs/ioctl.h +++ /dev/null @@ -1,502 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef __IOCTL_ -#define __IOCTL_ -#include - -#define BTRFS_IOCTL_MAGIC 0x94 -#define BTRFS_VOL_NAME_MAX 255 - -/* this should be 4k */ -#define BTRFS_PATH_NAME_MAX 4087 -struct btrfs_ioctl_vol_args { - __s64 fd; - char name[BTRFS_PATH_NAME_MAX + 1]; -}; - -#define BTRFS_DEVICE_PATH_NAME_MAX 1024 - -#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) -#define BTRFS_SUBVOL_RDONLY (1ULL << 1) -#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) -#define BTRFS_FSID_SIZE 16 -#define BTRFS_UUID_SIZE 16 - -#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) - -struct btrfs_qgroup_limit { - __u64 flags; - __u64 max_rfer; - __u64 max_excl; - __u64 rsv_rfer; - __u64 rsv_excl; -}; - -struct btrfs_qgroup_inherit { - __u64 flags; - __u64 num_qgroups; - __u64 num_ref_copies; - __u64 num_excl_copies; - struct btrfs_qgroup_limit lim; - __u64 qgroups[0]; -}; - -struct btrfs_ioctl_qgroup_limit_args { - __u64 qgroupid; - struct btrfs_qgroup_limit lim; -}; - -#define BTRFS_SUBVOL_NAME_MAX 4039 -struct btrfs_ioctl_vol_args_v2 { - __s64 fd; - __u64 transid; - __u64 flags; - union { - struct { - __u64 size; - struct btrfs_qgroup_inherit __user *qgroup_inherit; - }; - __u64 unused[4]; - }; - char name[BTRFS_SUBVOL_NAME_MAX + 1]; -}; - -/* - * structure to report errors and progress to userspace, either as a - * result of a finished scrub, a canceled scrub or a progress inquiry - */ -struct btrfs_scrub_progress { - __u64 data_extents_scrubbed; /* # of data extents scrubbed */ - __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */ - __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */ - __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ - __u64 read_errors; /* # of read errors encountered (EIO) */ - __u64 csum_errors; /* # of failed csum checks */ - __u64 verify_errors; /* # of occurences, where the metadata - * of a tree block did not match the - * expected values, like generation or - * logical */ - __u64 no_csum; /* # of 4k data block for which no csum - * is present, probably the result of - * data written with nodatasum */ - __u64 csum_discards; /* # of csum for which no data was found - * in the extent tree. */ - __u64 super_errors; /* # of bad super blocks encountered */ - __u64 malloc_errors; /* # of internal kmalloc errors. These - * will likely cause an incomplete - * scrub */ - __u64 uncorrectable_errors; /* # of errors where either no intact - * copy was found or the writeback - * failed */ - __u64 corrected_errors; /* # of errors corrected */ - __u64 last_physical; /* last physical address scrubbed. In - * case a scrub was aborted, this can - * be used to restart the scrub */ - __u64 unverified_errors; /* # of occurences where a read for a - * full (64k) bio failed, but the re- - * check succeeded for each 4k piece. - * Intermittent error. */ -}; - -#define BTRFS_SCRUB_READONLY 1 -struct btrfs_ioctl_scrub_args { - __u64 devid; /* in */ - __u64 start; /* in */ - __u64 end; /* in */ - __u64 flags; /* in */ - struct btrfs_scrub_progress progress; /* out */ - /* pad to 1k */ - __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; -}; - -#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 -#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 -struct btrfs_ioctl_dev_replace_start_params { - __u64 srcdevid; /* in, if 0, use srcdev_name instead */ - __u64 cont_reading_from_srcdev_mode; /* in, see #define - * above */ - __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ - __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ -}; - -#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 -struct btrfs_ioctl_dev_replace_status_params { - __u64 replace_state; /* out, see #define above */ - __u64 progress_1000; /* out, 0 <= x <= 1000 */ - __u64 time_started; /* out, seconds since 1-Jan-1970 */ - __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ - __u64 num_write_errors; /* out */ - __u64 num_uncorrectable_read_errors; /* out */ -}; - -#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 -#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 -#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 -struct btrfs_ioctl_dev_replace_args { - __u64 cmd; /* in */ - __u64 result; /* out */ - - union { - struct btrfs_ioctl_dev_replace_start_params start; - struct btrfs_ioctl_dev_replace_status_params status; - }; /* in/out */ - - __u64 spare[64]; -}; - -struct btrfs_ioctl_dev_info_args { - __u64 devid; /* in/out */ - __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ - __u64 bytes_used; /* out */ - __u64 total_bytes; /* out */ - __u64 unused[379]; /* pad to 4k */ - __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ -}; - -struct btrfs_ioctl_fs_info_args { - __u64 max_id; /* out */ - __u64 num_devices; /* out */ - __u8 fsid[BTRFS_FSID_SIZE]; /* out */ - __u64 reserved[124]; /* pad to 1k */ -}; - -/* balance control ioctl modes */ -#define BTRFS_BALANCE_CTL_PAUSE 1 -#define BTRFS_BALANCE_CTL_CANCEL 2 - -/* - * this is packed, because it should be exactly the same as its disk - * byte order counterpart (struct btrfs_disk_balance_args) - */ -struct btrfs_balance_args { - __u64 profiles; - __u64 usage; - __u64 devid; - __u64 pstart; - __u64 pend; - __u64 vstart; - __u64 vend; - - __u64 target; - - __u64 flags; - - __u64 unused[8]; -} __attribute__ ((__packed__)); - -/* report balance progress to userspace */ -struct btrfs_balance_progress { - __u64 expected; /* estimated # of chunks that will be - * relocated to fulfill the request */ - __u64 considered; /* # of chunks we have considered so far */ - __u64 completed; /* # of chunks relocated so far */ -}; - -#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) -#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) -#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) - -struct btrfs_ioctl_balance_args { - __u64 flags; /* in/out */ - __u64 state; /* out */ - - struct btrfs_balance_args data; /* in/out */ - struct btrfs_balance_args meta; /* in/out */ - struct btrfs_balance_args sys; /* in/out */ - - struct btrfs_balance_progress stat; /* out */ - - __u64 unused[72]; /* pad to 1k */ -}; - -#define BTRFS_INO_LOOKUP_PATH_MAX 4080 -struct btrfs_ioctl_ino_lookup_args { - __u64 treeid; - __u64 objectid; - char name[BTRFS_INO_LOOKUP_PATH_MAX]; -}; - -struct btrfs_ioctl_search_key { - /* which root are we searching. 0 is the tree of tree roots */ - __u64 tree_id; - - /* keys returned will be >= min and <= max */ - __u64 min_objectid; - __u64 max_objectid; - - /* keys returned will be >= min and <= max */ - __u64 min_offset; - __u64 max_offset; - - /* max and min transids to search for */ - __u64 min_transid; - __u64 max_transid; - - /* keys returned will be >= min and <= max */ - __u32 min_type; - __u32 max_type; - - /* - * how many items did userland ask for, and how many are we - * returning - */ - __u32 nr_items; - - /* align to 64 bits */ - __u32 unused; - - /* some extra for later */ - __u64 unused1; - __u64 unused2; - __u64 unused3; - __u64 unused4; -}; - -struct btrfs_ioctl_search_header { - __u64 transid; - __u64 objectid; - __u64 offset; - __u32 type; - __u32 len; -}; - -#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) -/* - * the buf is an array of search headers where - * each header is followed by the actual item - * the type field is expanded to 32 bits for alignment - */ -struct btrfs_ioctl_search_args { - struct btrfs_ioctl_search_key key; - char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; -}; - -struct btrfs_ioctl_clone_range_args { - __s64 src_fd; - __u64 src_offset, src_length; - __u64 dest_offset; -}; - -/* flags for the defrag range ioctl */ -#define BTRFS_DEFRAG_RANGE_COMPRESS 1 -#define BTRFS_DEFRAG_RANGE_START_IO 2 - -struct btrfs_ioctl_space_info { - __u64 flags; - __u64 total_bytes; - __u64 used_bytes; -}; - -struct btrfs_ioctl_space_args { - __u64 space_slots; - __u64 total_spaces; - struct btrfs_ioctl_space_info spaces[0]; -}; - -struct btrfs_data_container { - __u32 bytes_left; /* out -- bytes not needed to deliver output */ - __u32 bytes_missing; /* out -- additional bytes needed for result */ - __u32 elem_cnt; /* out */ - __u32 elem_missed; /* out */ - __u64 val[0]; /* out */ -}; - -struct btrfs_ioctl_ino_path_args { - __u64 inum; /* in */ - __u64 size; /* in */ - __u64 reserved[4]; - /* struct btrfs_data_container *fspath; out */ - __u64 fspath; /* out */ -}; - -struct btrfs_ioctl_logical_ino_args { - __u64 logical; /* in */ - __u64 size; /* in */ - __u64 reserved[4]; - /* struct btrfs_data_container *inodes; out */ - __u64 inodes; -}; - -enum btrfs_dev_stat_values { - /* disk I/O failure stats */ - BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ - BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ - BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ - - /* stats for indirect indications for I/O failures */ - BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or - * contents is illegal: this is an - * indication that the block was damaged - * during read or write, or written to - * wrong location or read from wrong - * location */ - BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not - * been written */ - - BTRFS_DEV_STAT_VALUES_MAX -}; - -/* Reset statistics after reading; needs SYS_ADMIN capability */ -#define BTRFS_DEV_STATS_RESET (1ULL << 0) - -struct btrfs_ioctl_get_dev_stats { - __u64 devid; /* in */ - __u64 nr_items; /* in/out */ - __u64 flags; /* in/out */ - - /* out values: */ - __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; - - __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ -}; - -#define BTRFS_QUOTA_CTL_ENABLE 1 -#define BTRFS_QUOTA_CTL_DISABLE 2 -#define BTRFS_QUOTA_CTL_RESCAN 3 -struct btrfs_ioctl_quota_ctl_args { - __u64 cmd; - __u64 status; -}; - -struct btrfs_ioctl_qgroup_assign_args { - __u64 assign; - __u64 src; - __u64 dst; -}; - -struct btrfs_ioctl_qgroup_create_args { - __u64 create; - __u64 qgroupid; -}; -struct btrfs_ioctl_timespec { - __u64 sec; - __u32 nsec; -}; - -struct btrfs_ioctl_received_subvol_args { - char uuid[BTRFS_UUID_SIZE]; /* in */ - __u64 stransid; /* in */ - __u64 rtransid; /* out */ - struct btrfs_ioctl_timespec stime; /* in */ - struct btrfs_ioctl_timespec rtime; /* out */ - __u64 flags; /* in */ - __u64 reserved[16]; /* in */ -}; - -struct btrfs_ioctl_send_args { - __s64 send_fd; /* in */ - __u64 clone_sources_count; /* in */ - __u64 __user *clone_sources; /* in */ - __u64 parent_root; /* in */ - __u64 flags; /* in */ - __u64 reserved[4]; /* in */ -}; - -#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ - struct btrfs_ioctl_vol_args) -/* trans start and trans end are dangerous, and only for - * use by applications that know how to avoid the - * resulting deadlocks - */ -#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) -#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) -#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) - -#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) -#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ - struct btrfs_ioctl_vol_args) - -#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ - struct btrfs_ioctl_clone_range_args) - -#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ - struct btrfs_ioctl_defrag_range_args) -#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ - struct btrfs_ioctl_search_args) -#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ - struct btrfs_ioctl_ino_lookup_args) -#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) -#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ - struct btrfs_ioctl_space_args) -#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) -#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) -#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ - struct btrfs_ioctl_vol_args_v2) -#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ - struct btrfs_ioctl_vol_args_v2) -#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) -#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) -#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ - struct btrfs_ioctl_scrub_args) -#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) -#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ - struct btrfs_ioctl_scrub_args) -#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ - struct btrfs_ioctl_dev_info_args) -#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ - struct btrfs_ioctl_fs_info_args) -#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ - struct btrfs_ioctl_balance_args) -#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) -#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ - struct btrfs_ioctl_balance_args) -#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ - struct btrfs_ioctl_ino_path_args) -#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ - struct btrfs_ioctl_ino_path_args) -#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ - struct btrfs_ioctl_received_subvol_args) -#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) -#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ - struct btrfs_ioctl_quota_ctl_args) -#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ - struct btrfs_ioctl_qgroup_assign_args) -#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ - struct btrfs_ioctl_qgroup_create_args) -#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ - struct btrfs_ioctl_qgroup_limit_args) -#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ - struct btrfs_ioctl_get_dev_stats) -#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ - struct btrfs_ioctl_dev_replace_args) - -#endif diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a5c856234323..a0d6368249fa 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -23,13 +23,13 @@ #include #include #include +#include #include "ctree.h" #include "transaction.h" #include "disk-io.h" #include "locking.h" #include "ulist.h" -#include "ioctl.h" #include "backref.h" /* TODO XXX FIXME diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67b373bf3ff9..6846ededfe95 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -41,13 +41,13 @@ #include #include #include +#include #include "compat.h" #include "delayed-inode.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "xattr.h" #include "volumes.h" diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d3c3939ac751..12bb84166a5f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -21,8 +21,8 @@ #include #include +#include #include "async-thread.h" -#include "ioctl.h" #define BTRFS_STRIPE_LEN (64 * 1024) diff --git a/include/linux/btrfs.h b/include/linux/btrfs.h new file mode 100644 index 000000000000..22d799147db2 --- /dev/null +++ b/include/linux/btrfs.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_BTRFS_H +#define _LINUX_BTRFS_H + +#include + +#endif /* _LINUX_BTRFS_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 19e765fbfef7..896ee1247294 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -68,6 +68,7 @@ header-y += blkpg.h header-y += blktrace_api.h header-y += bpqether.h header-y += bsg.h +header-y += btrfs.h header-y += can.h header-y += capability.h header-y += capi.h diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h new file mode 100644 index 000000000000..cffbb582dd90 --- /dev/null +++ b/include/uapi/linux/btrfs.h @@ -0,0 +1,503 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef _UAPI_LINUX_BTRFS_H +#define _UAPI_LINUX_BTRFS_H +#include +#include + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 + +/* this should be 4k */ +#define BTRFS_PATH_NAME_MAX 4087 +struct btrfs_ioctl_vol_args { + __s64 fd; + char name[BTRFS_PATH_NAME_MAX + 1]; +}; + +#define BTRFS_DEVICE_PATH_NAME_MAX 1024 + +#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#define BTRFS_SUBVOL_RDONLY (1ULL << 1) +#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) +#define BTRFS_FSID_SIZE 16 +#define BTRFS_UUID_SIZE 16 + +#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) + +struct btrfs_qgroup_limit { + __u64 flags; + __u64 max_rfer; + __u64 max_excl; + __u64 rsv_rfer; + __u64 rsv_excl; +}; + +struct btrfs_qgroup_inherit { + __u64 flags; + __u64 num_qgroups; + __u64 num_ref_copies; + __u64 num_excl_copies; + struct btrfs_qgroup_limit lim; + __u64 qgroups[0]; +}; + +struct btrfs_ioctl_qgroup_limit_args { + __u64 qgroupid; + struct btrfs_qgroup_limit lim; +}; + +#define BTRFS_SUBVOL_NAME_MAX 4039 +struct btrfs_ioctl_vol_args_v2 { + __s64 fd; + __u64 transid; + __u64 flags; + union { + struct { + __u64 size; + struct btrfs_qgroup_inherit __user *qgroup_inherit; + }; + __u64 unused[4]; + }; + char name[BTRFS_SUBVOL_NAME_MAX + 1]; +}; + +/* + * structure to report errors and progress to userspace, either as a + * result of a finished scrub, a canceled scrub or a progress inquiry + */ +struct btrfs_scrub_progress { + __u64 data_extents_scrubbed; /* # of data extents scrubbed */ + __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */ + __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */ + __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ + __u64 read_errors; /* # of read errors encountered (EIO) */ + __u64 csum_errors; /* # of failed csum checks */ + __u64 verify_errors; /* # of occurences, where the metadata + * of a tree block did not match the + * expected values, like generation or + * logical */ + __u64 no_csum; /* # of 4k data block for which no csum + * is present, probably the result of + * data written with nodatasum */ + __u64 csum_discards; /* # of csum for which no data was found + * in the extent tree. */ + __u64 super_errors; /* # of bad super blocks encountered */ + __u64 malloc_errors; /* # of internal kmalloc errors. These + * will likely cause an incomplete + * scrub */ + __u64 uncorrectable_errors; /* # of errors where either no intact + * copy was found or the writeback + * failed */ + __u64 corrected_errors; /* # of errors corrected */ + __u64 last_physical; /* last physical address scrubbed. In + * case a scrub was aborted, this can + * be used to restart the scrub */ + __u64 unverified_errors; /* # of occurences where a read for a + * full (64k) bio failed, but the re- + * check succeeded for each 4k piece. + * Intermittent error. */ +}; + +#define BTRFS_SCRUB_READONLY 1 +struct btrfs_ioctl_scrub_args { + __u64 devid; /* in */ + __u64 start; /* in */ + __u64 end; /* in */ + __u64 flags; /* in */ + struct btrfs_scrub_progress progress; /* out */ + /* pad to 1k */ + __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 +struct btrfs_ioctl_dev_replace_start_params { + __u64 srcdevid; /* in, if 0, use srcdev_name instead */ + __u64 cont_reading_from_srcdev_mode; /* in, see #define + * above */ + __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ + __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 +struct btrfs_ioctl_dev_replace_status_params { + __u64 replace_state; /* out, see #define above */ + __u64 progress_1000; /* out, 0 <= x <= 1000 */ + __u64 time_started; /* out, seconds since 1-Jan-1970 */ + __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ + __u64 num_write_errors; /* out */ + __u64 num_uncorrectable_read_errors; /* out */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 +struct btrfs_ioctl_dev_replace_args { + __u64 cmd; /* in */ + __u64 result; /* out */ + + union { + struct btrfs_ioctl_dev_replace_start_params start; + struct btrfs_ioctl_dev_replace_status_params status; + }; /* in/out */ + + __u64 spare[64]; +}; + +struct btrfs_ioctl_dev_info_args { + __u64 devid; /* in/out */ + __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ + __u64 bytes_used; /* out */ + __u64 total_bytes; /* out */ + __u64 unused[379]; /* pad to 4k */ + __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ +}; + +struct btrfs_ioctl_fs_info_args { + __u64 max_id; /* out */ + __u64 num_devices; /* out */ + __u8 fsid[BTRFS_FSID_SIZE]; /* out */ + __u64 reserved[124]; /* pad to 1k */ +}; + +/* balance control ioctl modes */ +#define BTRFS_BALANCE_CTL_PAUSE 1 +#define BTRFS_BALANCE_CTL_CANCEL 2 + +/* + * this is packed, because it should be exactly the same as its disk + * byte order counterpart (struct btrfs_disk_balance_args) + */ +struct btrfs_balance_args { + __u64 profiles; + __u64 usage; + __u64 devid; + __u64 pstart; + __u64 pend; + __u64 vstart; + __u64 vend; + + __u64 target; + + __u64 flags; + + __u64 unused[8]; +} __attribute__ ((__packed__)); + +/* report balance progress to userspace */ +struct btrfs_balance_progress { + __u64 expected; /* estimated # of chunks that will be + * relocated to fulfill the request */ + __u64 considered; /* # of chunks we have considered so far */ + __u64 completed; /* # of chunks relocated so far */ +}; + +#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) +#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) +#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) + +struct btrfs_ioctl_balance_args { + __u64 flags; /* in/out */ + __u64 state; /* out */ + + struct btrfs_balance_args data; /* in/out */ + struct btrfs_balance_args meta; /* in/out */ + struct btrfs_balance_args sys; /* in/out */ + + struct btrfs_balance_progress stat; /* out */ + + __u64 unused[72]; /* pad to 1k */ +}; + +#define BTRFS_INO_LOOKUP_PATH_MAX 4080 +struct btrfs_ioctl_ino_lookup_args { + __u64 treeid; + __u64 objectid; + char name[BTRFS_INO_LOOKUP_PATH_MAX]; +}; + +struct btrfs_ioctl_search_key { + /* which root are we searching. 0 is the tree of tree roots */ + __u64 tree_id; + + /* keys returned will be >= min and <= max */ + __u64 min_objectid; + __u64 max_objectid; + + /* keys returned will be >= min and <= max */ + __u64 min_offset; + __u64 max_offset; + + /* max and min transids to search for */ + __u64 min_transid; + __u64 max_transid; + + /* keys returned will be >= min and <= max */ + __u32 min_type; + __u32 max_type; + + /* + * how many items did userland ask for, and how many are we + * returning + */ + __u32 nr_items; + + /* align to 64 bits */ + __u32 unused; + + /* some extra for later */ + __u64 unused1; + __u64 unused2; + __u64 unused3; + __u64 unused4; +}; + +struct btrfs_ioctl_search_header { + __u64 transid; + __u64 objectid; + __u64 offset; + __u32 type; + __u32 len; +}; + +#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) +/* + * the buf is an array of search headers where + * each header is followed by the actual item + * the type field is expanded to 32 bits for alignment + */ +struct btrfs_ioctl_search_args { + struct btrfs_ioctl_search_key key; + char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; +}; + +struct btrfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; +}; + +/* flags for the defrag range ioctl */ +#define BTRFS_DEFRAG_RANGE_COMPRESS 1 +#define BTRFS_DEFRAG_RANGE_START_IO 2 + +struct btrfs_ioctl_space_info { + __u64 flags; + __u64 total_bytes; + __u64 used_bytes; +}; + +struct btrfs_ioctl_space_args { + __u64 space_slots; + __u64 total_spaces; + struct btrfs_ioctl_space_info spaces[0]; +}; + +struct btrfs_data_container { + __u32 bytes_left; /* out -- bytes not needed to deliver output */ + __u32 bytes_missing; /* out -- additional bytes needed for result */ + __u32 elem_cnt; /* out */ + __u32 elem_missed; /* out */ + __u64 val[0]; /* out */ +}; + +struct btrfs_ioctl_ino_path_args { + __u64 inum; /* in */ + __u64 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *fspath; out */ + __u64 fspath; /* out */ +}; + +struct btrfs_ioctl_logical_ino_args { + __u64 logical; /* in */ + __u64 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *inodes; out */ + __u64 inodes; +}; + +enum btrfs_dev_stat_values { + /* disk I/O failure stats */ + BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ + + /* stats for indirect indications for I/O failures */ + BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or + * contents is illegal: this is an + * indication that the block was damaged + * during read or write, or written to + * wrong location or read from wrong + * location */ + BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not + * been written */ + + BTRFS_DEV_STAT_VALUES_MAX +}; + +/* Reset statistics after reading; needs SYS_ADMIN capability */ +#define BTRFS_DEV_STATS_RESET (1ULL << 0) + +struct btrfs_ioctl_get_dev_stats { + __u64 devid; /* in */ + __u64 nr_items; /* in/out */ + __u64 flags; /* in/out */ + + /* out values: */ + __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; + + __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ +}; + +#define BTRFS_QUOTA_CTL_ENABLE 1 +#define BTRFS_QUOTA_CTL_DISABLE 2 +#define BTRFS_QUOTA_CTL_RESCAN 3 +struct btrfs_ioctl_quota_ctl_args { + __u64 cmd; + __u64 status; +}; + +struct btrfs_ioctl_qgroup_assign_args { + __u64 assign; + __u64 src; + __u64 dst; +}; + +struct btrfs_ioctl_qgroup_create_args { + __u64 create; + __u64 qgroupid; +}; +struct btrfs_ioctl_timespec { + __u64 sec; + __u32 nsec; +}; + +struct btrfs_ioctl_received_subvol_args { + char uuid[BTRFS_UUID_SIZE]; /* in */ + __u64 stransid; /* in */ + __u64 rtransid; /* out */ + struct btrfs_ioctl_timespec stime; /* in */ + struct btrfs_ioctl_timespec rtime; /* out */ + __u64 flags; /* in */ + __u64 reserved[16]; /* in */ +}; + +struct btrfs_ioctl_send_args { + __s64 send_fd; /* in */ + __u64 clone_sources_count; /* in */ + __u64 __user *clone_sources; /* in */ + __u64 parent_root; /* in */ + __u64 flags; /* in */ + __u64 reserved[4]; /* in */ +}; + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ + struct btrfs_ioctl_vol_args) +/* trans start and trans end are dangerous, and only for + * use by applications that know how to avoid the + * resulting deadlocks + */ +#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) +#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) +#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) + +#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) +#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ + struct btrfs_ioctl_vol_args) + +#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ + struct btrfs_ioctl_clone_range_args) + +#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ + struct btrfs_ioctl_defrag_range_args) +#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ + struct btrfs_ioctl_search_args) +#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ + struct btrfs_ioctl_ino_lookup_args) +#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) +#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ + struct btrfs_ioctl_space_args) +#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) +#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) +#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) +#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) +#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) +#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ + struct btrfs_ioctl_dev_info_args) +#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ + struct btrfs_ioctl_fs_info_args) +#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) +#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ + struct btrfs_ioctl_received_subvol_args) +#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) +#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ + struct btrfs_ioctl_quota_ctl_args) +#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ + struct btrfs_ioctl_qgroup_assign_args) +#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ + struct btrfs_ioctl_qgroup_create_args) +#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ + struct btrfs_ioctl_qgroup_limit_args) +#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ + struct btrfs_ioctl_get_dev_stats) +#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ + struct btrfs_ioctl_dev_replace_args) + +#endif /* _UAPI_LINUX_BTRFS_H */ -- cgit v1.2.3 From cb95e7bf7ba481c3d35b238b1cd671b63f54238a Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Mon, 4 Feb 2013 20:54:57 +0000 Subject: btrfs: add "no file data" flag to btrfs send ioctl This patch adds the flag, BTRFS_SEND_FLAG_NO_FILE_DATA to the btrfs send ioctl code. When this flag is set, the btrfs send code will never write file data into the stream (thus also avoiding expensive reads of that data in the first place). BTRFS_SEND_C_UPDATE_EXTENT commands will be sent (instead of BTRFS_SEND_C_WRITE) with an offset, length pair indicating the extent in question. This patch does not affect the operation of BTRFS_SEND_C_CLONE commands - they will continue to be sent when a search finds an appropriate extent to clone from. Signed-off-by: Mark Fasheh Signed-off-by: Josef Bacik --- fs/btrfs/send.c | 50 ++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/send.h | 1 + include/uapi/linux/btrfs.h | 7 +++++++ 3 files changed, 54 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 614da0d44d56..68da757615ae 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -85,6 +85,7 @@ struct send_ctx { u32 send_max_size; u64 total_send_size; u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; + u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ struct vfsmount *mnt; @@ -3709,6 +3710,39 @@ out: return ret; } +/* + * Send an update extent command to user space. + */ +static int send_update_extent(struct send_ctx *sctx, + u64 offset, u32 len) +{ + int ret = 0; + struct fs_path *p; + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); + if (ret < 0) + goto out; + + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); + TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + fs_path_free(sctx, p); + return ret; +} + static int send_write_or_clone(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_key *key, @@ -3744,7 +3778,11 @@ static int send_write_or_clone(struct send_ctx *sctx, goto out; } - if (!clone_root) { + if (clone_root) { + ret = send_clone(sctx, offset, len, clone_root); + } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { + ret = send_update_extent(sctx, offset, len); + } else { while (pos < len) { l = len - pos; if (l > BTRFS_SEND_READ_SIZE) @@ -3757,10 +3795,7 @@ static int send_write_or_clone(struct send_ctx *sctx, pos += ret; } ret = 0; - } else { - ret = send_clone(sctx, offset, len, clone_root); } - out: return ret; } @@ -4560,6 +4595,11 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) goto out; } + if (arg->flags & ~BTRFS_SEND_FLAG_NO_FILE_DATA) { + ret = -EINVAL; + goto out; + } + sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); if (!sctx) { ret = -ENOMEM; @@ -4571,6 +4611,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); INIT_LIST_HEAD(&sctx->name_cache_list); + sctx->flags = arg->flags; + sctx->send_filp = fget(arg->send_fd); if (IS_ERR(sctx->send_filp)) { ret = PTR_ERR(sctx->send_filp); diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 1bf4f32fd4ef..8bb18f7ccaa6 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h @@ -86,6 +86,7 @@ enum btrfs_send_cmd { BTRFS_SEND_C_UTIMES, BTRFS_SEND_C_END, + BTRFS_SEND_C_UPDATE_EXTENT, __BTRFS_SEND_C_MAX, }; #define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1) diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index cffbb582dd90..dd9f1293ab35 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -407,6 +407,13 @@ struct btrfs_ioctl_received_subvol_args { __u64 reserved[16]; /* in */ }; +/* + * Caller doesn't want file data in the send stream, even if the + * search of clone sources doesn't find an extent. UPDATE_EXTENT + * commands will be sent instead of WRITE commands. + */ +#define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1 + struct btrfs_ioctl_send_args { __s64 send_fd; /* in */ __u64 clone_sources_count; /* in */ -- cgit v1.2.3 From 867ab667e74377160c4a683375ee5b8bf8801724 Mon Sep 17 00:00:00 2001 From: "jeff.liu" Date: Sat, 5 Jan 2013 02:48:01 +0000 Subject: Btrfs: Add a new ioctl to get the label of a mounted file system Add a new ioctl(2) BTRFS_IOC_GET_FSLABLE, so that we can get the label upon a mounted filesystem. Signed-off-by: Jie Liu Signed-off-by: Anand Jain Cc: Miao Xie Cc: Goffredo Baroncelli Cc: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 21 +++++++++++++++++++++ include/uapi/linux/btrfs.h | 2 ++ 2 files changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d02ec577f70f..fcc15a6804a9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3917,6 +3917,25 @@ out: return ret; } +static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) +{ + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + const char *label = root->fs_info->super_copy->label; + size_t len = strnlen(label, BTRFS_LABEL_SIZE); + int ret; + + if (len == BTRFS_LABEL_SIZE) { + pr_warn("btrfs: label is too long, return the first %zu bytes\n", + --len); + } + + mutex_lock(&root->fs_info->volume_mutex); + ret = copy_to_user(arg, label, len); + mutex_unlock(&root->fs_info->volume_mutex); + + return ret ? -EFAULT : 0; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -4017,6 +4036,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_qgroup_limit(file, argp); case BTRFS_IOC_DEV_REPLACE: return btrfs_ioctl_dev_replace(root, argp); + case BTRFS_IOC_GET_FSLABEL: + return btrfs_ioctl_get_fslabel(file, argp); } return -ENOTTY; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index dd9f1293ab35..51c0b335e0c8 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -502,6 +502,8 @@ struct btrfs_ioctl_send_args { struct btrfs_ioctl_qgroup_create_args) #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ struct btrfs_ioctl_qgroup_limit_args) +#define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \ + char[BTRFS_LABEL_SIZE]) #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ struct btrfs_ioctl_get_dev_stats) #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ -- cgit v1.2.3 From a8bfd4abea3da0e28f215e2a2b8c2f1ca27ebe80 Mon Sep 17 00:00:00 2001 From: "jeff.liu" Date: Sat, 5 Jan 2013 02:48:08 +0000 Subject: Btrfs: set/change the label of a mounted file system With this new ioctl(2) BTRFS_IOC_SET_FSLABEL, we can set/change the label of a mounted file system. Signed-off-by: Jie Liu Signed-off-by: Anand Jain Reviewed-by: Miao Xie Reviewed-by: Goffredo Baroncelli Reviewed-by: David Sterba Reviewed-by: Goffredo Baroncelli Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/btrfs.h | 2 ++ 2 files changed, 44 insertions(+) (limited to 'include/uapi/linux') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fcc15a6804a9..0f68729f261e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3936,6 +3936,46 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) return ret ? -EFAULT : 0; } +static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) +{ + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_super_block *super_block = root->fs_info->super_copy; + struct btrfs_trans_handle *trans; + char label[BTRFS_LABEL_SIZE]; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(label, arg, sizeof(label))) + return -EFAULT; + + if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { + pr_err("btrfs: unable to set label with more than %d bytes\n", + BTRFS_LABEL_SIZE - 1); + return -EINVAL; + } + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + mutex_lock(&root->fs_info->volume_mutex); + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out_unlock; + } + + strcpy(super_block->label, label); + ret = btrfs_end_transaction(trans, root); + +out_unlock: + mutex_unlock(&root->fs_info->volume_mutex); + mnt_drop_write_file(file); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -4038,6 +4078,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_dev_replace(root, argp); case BTRFS_IOC_GET_FSLABEL: return btrfs_ioctl_get_fslabel(file, argp); + case BTRFS_IOC_SET_FSLABEL: + return btrfs_ioctl_set_fslabel(file, argp); } return -ENOTTY; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 51c0b335e0c8..fa3a5f9338fc 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -504,6 +504,8 @@ struct btrfs_ioctl_send_args { struct btrfs_ioctl_qgroup_limit_args) #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \ char[BTRFS_LABEL_SIZE]) +#define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \ + char[BTRFS_LABEL_SIZE]) #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ struct btrfs_ioctl_get_dev_stats) #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ -- cgit v1.2.3 From b531f81b0d70ffbe8d70500512483227cc532608 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Thu, 21 Feb 2013 01:55:50 +0000 Subject: ALSA: usb: Fix Processing Unit Descriptor parsers Commit 99fc86450c439039d2ef88d06b222fd51a779176 "ALSA: usb-mixer: parse descriptors with structs" introduced a set of useful parsers for descriptors. Unfortunately the parses for the Processing Unit Descriptor came with a very subtle bug... Functions uac_processing_unit_iProcessing() and uac_processing_unit_specific() were indexing the baSourceID array forgetting the fields before the iProcessing and process-specific descriptors. The problem was observed with Sound Blaster Extigy mixer, where nNrModes in Up/Down-mix Processing Unit Descriptor was accessed at offset 10 of the descriptor (value 0) instead of offset 15 (value 7). In result the resulting control had interesting limit values: Simple mixer control 'Channel Routing Mode Select',0 Capabilities: volume volume-joined penum Playback channels: Mono Capture channels: Mono Limits: 0 - -1 Mono: -1 [100%] Fixed by starting from the bmControls, which was calculated correctly, instead of baSourceID. Now the mentioned control is fine: Simple mixer control 'Channel Routing Mode Select',0 Capabilities: volume volume-joined penum Playback channels: Mono Capture channels: Mono Limits: 0 - 6 Mono: 0 [0%] Signed-off-by: Pawel Moll Cc: Signed-off-by: Takashi Iwai --- include/uapi/linux/usb/audio.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h index ac90037894d9..d2314be4f0c0 100644 --- a/include/uapi/linux/usb/audio.h +++ b/include/uapi/linux/usb/audio.h @@ -384,14 +384,16 @@ static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_de int protocol) { __u8 control_size = uac_processing_unit_bControlSize(desc, protocol); - return desc->baSourceID[desc->bNrInPins + control_size]; + return *(uac_processing_unit_bmControls(desc, protocol) + + control_size); } static inline __u8 *uac_processing_unit_specific(struct uac_processing_unit_descriptor *desc, int protocol) { __u8 control_size = uac_processing_unit_bControlSize(desc, protocol); - return &desc->baSourceID[desc->bNrInPins + control_size + 1]; + return uac_processing_unit_bmControls(desc, protocol) + + control_size + 1; } /* 4.5.2 Class-Specific AS Interface Descriptor */ -- cgit v1.2.3 From ffecfd1a72fccfcee3dabb99b9ecba9735318f90 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Feb 2013 16:42:55 -0800 Subject: block: optionally snapshot page contents to provide stable pages during write This provides a band-aid to provide stable page writes on jbd without needing to backport the fixed locking and page writeback bit handling schemes of jbd2. The band-aid works by using bounce buffers to snapshot page contents instead of waiting. For those wondering about the ext3 bandage -- fixing the jbd locking (which was done as part of ext4dev years ago) is a lot of surgery, and setting PG_writeback on data pages when we actually hold the page lock dropped ext3 performance by nearly an order of magnitude. If we're going to migrate iscsi and raid to use stable page writes, the complaints about high latency will likely return. We might as well centralize their page snapshotting thing to one place. Signed-off-by: Darrick J. Wong Tested-by: Andy Lutomirski Cc: Adrian Hunter Cc: Artem Bityutskiy Reviewed-by: Jan Kara Cc: Joel Becker Cc: Mark Fasheh Cc: Steven Whitehouse Cc: Jens Axboe Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/Kconfig | 6 ------ block/blk-core.c | 8 +++++--- fs/ext3/super.c | 1 + include/uapi/linux/fs.h | 3 +++ mm/Kconfig | 13 +++++++++++++ mm/bounce.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---- mm/page-writeback.c | 4 ++++ 7 files changed, 70 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 1bb7ad4aeff4..b1e68f52029c 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -412,12 +412,6 @@ config TILE_USB Provides USB host adapter support for the built-in EHCI and OHCI interfaces on TILE-Gx chips. -# USB OHCI needs the bounce pool since tilegx will often have more -# than 4GB of memory, but we don't currently use the IOTLB to present -# a 32-bit address to OHCI. So we need to use a bounce pool instead. -config NEED_BOUNCE_POOL - def_bool USB_OHCI_HCD - source "drivers/pci/hotplug/Kconfig" endmenu diff --git a/block/blk-core.c b/block/blk-core.c index c973249d68cd..277134cb5d32 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1474,6 +1474,11 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) */ blk_queue_bounce(q, &bio); + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { + bio_endio(bio, -EIO); + return; + } + if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { spin_lock_irq(q->queue_lock); where = ELEVATOR_INSERT_FLUSH; @@ -1714,9 +1719,6 @@ generic_make_request_checks(struct bio *bio) */ blk_partition_remap(bio); - if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) - goto end_io; - if (bio_check_eod(bio, nr_sectors)) goto end_io; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6e50223b3299..4ba2683c1d44 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2065,6 +2065,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": "writeback"); + sb->s_flags |= MS_SNAP_STABLE; return 0; diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 780d4c6093eb..c7fc1e6517c3 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -86,6 +86,9 @@ struct inodes_stat_t { #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define MS_I_VERSION (1<<23) /* Update inode I_version field */ #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ + +/* These sb flags are internal to the kernel */ +#define MS_SNAP_STABLE (1<<27) /* Snapshot pages during writeback, if needed */ #define MS_NOSEC (1<<28) #define MS_BORN (1<<29) #define MS_ACTIVE (1<<30) diff --git a/mm/Kconfig b/mm/Kconfig index 278e3ab1f169..7901d839aab2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -258,6 +258,19 @@ config BOUNCE def_bool y depends on BLOCK && MMU && (ZONE_DMA || HIGHMEM) +# On the 'tile' arch, USB OHCI needs the bounce pool since tilegx will often +# have more than 4GB of memory, but we don't currently use the IOTLB to present +# a 32-bit address to OHCI. So we need to use a bounce pool instead. +# +# We also use the bounce pool to provide stable page writes for jbd. jbd +# initiates buffer writeback without locking the page or setting PG_writeback, +# and fixing that behavior (a second time; jbd2 doesn't have this problem) is +# a major rework effort. Instead, use the bounce buffer to snapshot pages +# (until jbd goes away). The only jbd user is ext3. +config NEED_BOUNCE_POOL + bool + default y if (TILE && USB_OHCI_HCD) || (BLK_DEV_INTEGRITY && JBD) + config NR_QUICK int depends on QUICKLIST diff --git a/mm/bounce.c b/mm/bounce.c index 042086775561..5f8901768602 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -178,8 +178,45 @@ static void bounce_end_io_read_isa(struct bio *bio, int err) __bounce_end_io_read(bio, isa_page_pool, err); } +#ifdef CONFIG_NEED_BOUNCE_POOL +static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) +{ + struct page *page; + struct backing_dev_info *bdi; + struct address_space *mapping; + struct bio_vec *from; + int i; + + if (bio_data_dir(bio) != WRITE) + return 0; + + if (!bdi_cap_stable_pages_required(&q->backing_dev_info)) + return 0; + + /* + * Based on the first page that has a valid mapping, decide whether or + * not we have to employ bounce buffering to guarantee stable pages. + */ + bio_for_each_segment(from, bio, i) { + page = from->bv_page; + mapping = page_mapping(page); + if (!mapping) + continue; + bdi = mapping->backing_dev_info; + return mapping->host->i_sb->s_flags & MS_SNAP_STABLE; + } + + return 0; +} +#else +static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) +{ + return 0; +} +#endif /* CONFIG_NEED_BOUNCE_POOL */ + static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, - mempool_t *pool) + mempool_t *pool, int force) { struct page *page; struct bio *bio = NULL; @@ -192,7 +229,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, /* * is destination page below bounce pfn? */ - if (page_to_pfn(page) <= queue_bounce_pfn(q)) + if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) continue; /* @@ -270,6 +307,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) { + int must_bounce; mempool_t *pool; /* @@ -278,13 +316,15 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) if (!bio_has_data(*bio_orig)) return; + must_bounce = must_snapshot_stable_pages(q, *bio_orig); + /* * for non-isa bounce case, just check if the bounce pfn is equal * to or bigger than the highest pfn in the system -- in that case, * don't waste time iterating over bio segments */ if (!(q->bounce_gfp & GFP_DMA)) { - if (queue_bounce_pfn(q) >= blk_max_pfn) + if (queue_bounce_pfn(q) >= blk_max_pfn && !must_bounce) return; pool = page_pool; } else { @@ -295,7 +335,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) /* * slow path */ - __blk_queue_bounce(q, bio_orig, pool); + __blk_queue_bounce(q, bio_orig, pool, must_bounce); } EXPORT_SYMBOL(blk_queue_bounce); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 355d5ee69058..7300c9d5e1d9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2306,6 +2306,10 @@ void wait_for_stable_page(struct page *page) if (!bdi_cap_stable_pages_required(bdi)) return; +#ifdef CONFIG_NEED_BOUNCE_POOL + if (mapping->host->i_sb->s_flags & MS_SNAP_STABLE) + return; +#endif /* CONFIG_NEED_BOUNCE_POOL */ wait_on_page_writeback(page); } -- cgit v1.2.3 From 242260fb858e99674289484bc2bfe3b41f9c4cbb Mon Sep 17 00:00:00 2001 From: Christian Kujau Date: Thu, 21 Feb 2013 16:43:05 -0800 Subject: sun.com documentation fixes After I came across a help text for SUNGEM mentioning a broken sun.com URL, I felt like fixing those up, as they are now pointing to oracle.com URLs. Signed-off-by: Christian Kujau Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/devicetree/booting-without-of.txt | 2 +- drivers/net/ethernet/sun/Kconfig | 4 ++-- include/uapi/linux/elf.h | 12 ++++++------ net/ipv6/Kconfig | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt index d4d66757354e..b2fb2f5e1922 100644 --- a/Documentation/devicetree/booting-without-of.txt +++ b/Documentation/devicetree/booting-without-of.txt @@ -1228,7 +1228,7 @@ hierarchy and routing of interrupts in the hardware. The interrupt tree model is fully described in the document "Open Firmware Recommended Practice: Interrupt Mapping Version 0.9". The document is available at: -. + 1) interrupts property ---------------------- diff --git a/drivers/net/ethernet/sun/Kconfig b/drivers/net/ethernet/sun/Kconfig index 57bfd8599679..208c39dedc2e 100644 --- a/drivers/net/ethernet/sun/Kconfig +++ b/drivers/net/ethernet/sun/Kconfig @@ -61,7 +61,7 @@ config SUNGEM select SUNGEM_PHY ---help--- Support for the Sun GEM chip, aka Sun GigabitEthernet/P 2.0. See also - . + . config CASSINI tristate "Sun Cassini support" @@ -69,7 +69,7 @@ config CASSINI select CRC32 ---help--- Support for the Sun Cassini chip, aka Sun GigaSwift Ethernet. See also - + . config SUNVNET tristate "Sun Virtual Network support" diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 126a8175e3e2..900b9484445b 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -49,14 +49,14 @@ typedef __s64 Elf64_Sxword; * * Specifications are available in: * - * - Sun microsystems: Linker and Libraries. - * Part No: 817-1984-17, September 2008. - * URL: http://docs.sun.com/app/docs/doc/817-1984 + * - Oracle: Linker and Libraries. + * Part No: 817–1984–19, August 2011. + * http://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf * * - System V ABI AMD64 Architecture Processor Supplement - * Draft Version 0.99., - * May 11, 2009. - * URL: http://www.x86-64.org/ + * Draft Version 0.99.4, + * January 13, 2010. + * http://www.cs.washington.edu/education/courses/cse351/12wi/supp-docs/abi.pdf */ #define PN_XNUM 0xffff diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 4f7fe7270e37..a2246afc0007 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -11,7 +11,7 @@ menuconfig IPV6 You will still be able to do traditional IPv4 networking as well. For general information about IPv6, see - . + . For Linux IPv6 development information, see . For specific information about IPv6 under Linux, read the HOWTO at . -- cgit v1.2.3 From 5841ca09b35df4ecb0fee4e8fbd21ef177509a71 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Wed, 27 Feb 2013 17:02:59 -0800 Subject: hfsplus: add osx.* prefix for handling namespace of Mac OS X extended attributes hfsplus: reworked support of extended attributes. Current mainline implementation of hfsplus file system driver treats as extended attributes only two fields (fdType and fdCreator) of user_info field in file description record (struct hfsplus_cat_file). It is possible to get or set only these two fields as extended attributes. But HFS+ treats as com.apple.FinderInfo extended attribute an union of user_info and finder_info fields as for file (struct hfsplus_cat_file) as for folder (struct hfsplus_cat_folder). Moreover, current mainline implementation of hfsplus file system driver doesn't support special metadata file - attributes tree. Mac OS X 10.4 and later support extended attributes by making use of the HFS+ filesystem Attributes file B*-tree feature which allows for named forks. Mac OS X supports only inline extended attributes, limiting their size to 3802 bytes. Any regular file may have a list of extended attributes. HFS+ supports an arbitrary number of named forks. Each attribute is denoted by a name and the associated data. The name is a null-terminated Unicode string. It is possible to list, to get, to set, and to remove extended attributes from files or directories. It exists some peculiarity during getting of extended attributes list by means of getfattr utility. The getfattr utility expects prefix "user." before any extended attribute's name. So, it ignores any names that don't contained such prefix. Such behavior of getfattr utility results in unexpected empty output of extended attributes list even in the case when file (or folder) contains extended attributes. It needs to use empty string as regular expression pattern for names matching (getfattr --match=""). For support of extended attributes in HFS+: 1. It was added necessary on-disk layout declarations related to Attributes tree into hfsplus_raw.h file. 2. It was added attributes.c file with implementation of functionality of manipulation by records in Attributes tree. 3. It was reworked hfsplus_listxattr, hfsplus_getxattr, hfsplus_setxattr functions in ioctl.c. Moreover, it was added hfsplus_removexattr method. This patch: Add osx.* prefix for handling namespace of Mac OS X extended attributes. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Vyacheslav Dubeyko Reported-by: Hin-Tak Leung Cc: Al Viro Cc: Christoph Hellwig Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/xattr.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index 26607bd965fa..e4629b93bdd6 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -15,19 +15,22 @@ /* Namespaces */ #define XATTR_OS2_PREFIX "os2." -#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1) +#define XATTR_OS2_PREFIX_LEN (sizeof(XATTR_OS2_PREFIX) - 1) + +#define XATTR_MAC_OSX_PREFIX "osx." +#define XATTR_MAC_OSX_PREFIX_LEN (sizeof(XATTR_MAC_OSX_PREFIX) - 1) #define XATTR_SECURITY_PREFIX "security." -#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1) +#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1) #define XATTR_SYSTEM_PREFIX "system." -#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1) +#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1) #define XATTR_TRUSTED_PREFIX "trusted." -#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1) +#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1) #define XATTR_USER_PREFIX "user." -#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1) +#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1) /* Security namespace */ #define XATTR_EVM_SUFFIX "evm" -- cgit v1.2.3 From 6b46419b0462ae565880f02e9cd0baf9b25ea71f Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 27 Feb 2013 17:03:07 -0800 Subject: fat: add extended fileds to struct fat_boot_sector Later we will need "state" field to check if volume was cleanly unmounted. Signed-off-by: Oleksij Rempel Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 8 ++++---- include/uapi/linux/msdos_fs.h | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/fat/inode.c b/fs/fat/inode.c index f8f491677a4a..4b4d4ef910f3 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1298,17 +1298,17 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->prev_free = FAT_START_ENT; sb->s_maxbytes = 0xffffffff; - if (!sbi->fat_length && b->fat32_length) { + if (!sbi->fat_length && b->fat32.length) { struct fat_boot_fsinfo *fsinfo; struct buffer_head *fsinfo_bh; /* Must be FAT32 */ sbi->fat_bits = 32; - sbi->fat_length = le32_to_cpu(b->fat32_length); - sbi->root_cluster = le32_to_cpu(b->root_cluster); + sbi->fat_length = le32_to_cpu(b->fat32.length); + sbi->root_cluster = le32_to_cpu(b->fat32.root_cluster); /* MC - if info_sector is 0, don't multiply by 0 */ - sbi->fsinfo_sector = le16_to_cpu(b->info_sector); + sbi->fsinfo_sector = le16_to_cpu(b->fat32.info_sector); if (sbi->fsinfo_sector == 0) sbi->fsinfo_sector = 1; diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h index 996719f82e28..b9f12450efe8 100644 --- a/include/uapi/linux/msdos_fs.h +++ b/include/uapi/linux/msdos_fs.h @@ -120,14 +120,34 @@ struct fat_boot_sector { __le32 hidden; /* hidden sectors (unused) */ __le32 total_sect; /* number of sectors (if sectors == 0) */ - /* The following fields are only used by FAT32 */ - __le32 fat32_length; /* sectors/FAT */ - __le16 flags; /* bit 8: fat mirroring, low 4: active fat */ - __u8 version[2]; /* major, minor filesystem version */ - __le32 root_cluster; /* first cluster in root directory */ - __le16 info_sector; /* filesystem info sector */ - __le16 backup_boot; /* backup boot sector */ - __le16 reserved2[6]; /* Unused */ + union { + struct { + /* Extended BPB Fields for FAT16 */ + __u8 drive_number; /* Physical drive number */ + __u8 state; /* undocumented, but used + for mount state. */ + /* other fiealds are not added here */ + } fat16; + + struct { + /* only used by FAT32 */ + __le32 length; /* sectors/FAT */ + __le16 flags; /* bit 8: fat mirroring, + low 4: active fat */ + __u8 version[2]; /* major, minor filesystem + version */ + __le32 root_cluster; /* first cluster in + root directory */ + __le16 info_sector; /* filesystem info sector */ + __le16 backup_boot; /* backup boot sector */ + __le16 reserved2[6]; /* Unused */ + /* Extended BPB Fields for FAT32 */ + __u8 drive_number; /* Physical drive number */ + __u8 state; /* undocumented, but used + for mount state. */ + /* other fiealds are not added here */ + } fat32; + }; }; struct fat_boot_fsinfo { -- cgit v1.2.3 From b88a105802e9aeb6e234e8106659f5d1271081bb Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 27 Feb 2013 17:03:09 -0800 Subject: fat: mark fs as dirty on mount and clean on umount There is no documented methods to mark FAT as dirty. Unofficially MS started to use reserved Byte in boot sector for this purpose, at least since Win 2000. With Win 7 user is warned if fs is dirty and asked to clean it. Different versions of Win, handle it in different ways, but always have same meaning: - Win 2000 and XP, set it on write operations and remove it after operation was finnished - Win 7, set dirty flag on first write and remove it on umount. We will do it as follows: - set dirty flag on mount. If fs was initially dirty, warn user, remember it and do not do any changes to boot sector. - clean it on umount. If fs was initially dirty, leave it dirty. - do not do any thing if fs mounted read-only. - TODO: leave fs dirty if we found some error after mount. Signed-off-by: Oleksij Rempel Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 2 ++ fs/fat/inode.c | 66 +++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/msdos_fs.h | 2 ++ 3 files changed, 70 insertions(+) (limited to 'include/uapi/linux') diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 12701a567752..e9cc3f0d58e2 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -95,6 +95,8 @@ struct msdos_sb_info { spinlock_t dir_hash_lock; struct hlist_head dir_hashtable[FAT_HASH_SIZE]; + + unsigned int dirty; /* fs state before mount */ }; #define FAT_CACHE_VALID 0 /* special case for valid cache */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 4b4d4ef910f3..780e20806346 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -488,10 +488,59 @@ static void fat_evict_inode(struct inode *inode) fat_detach(inode); } +static void fat_set_state(struct super_block *sb, + unsigned int set, unsigned int force) +{ + struct buffer_head *bh; + struct fat_boot_sector *b; + struct msdos_sb_info *sbi = sb->s_fs_info; + + /* do not change any thing if mounted read only */ + if ((sb->s_flags & MS_RDONLY) && !force) + return; + + /* do not change state if fs was dirty */ + if (sbi->dirty) { + /* warn only on set (mount). */ + if (set) + fat_msg(sb, KERN_WARNING, "Volume was not properly " + "unmounted. Some data may be corrupt. " + "Please run fsck."); + return; + } + + bh = sb_bread(sb, 0); + if (bh == NULL) { + fat_msg(sb, KERN_ERR, "unable to read boot sector " + "to mark fs as dirty"); + return; + } + + b = (struct fat_boot_sector *) bh->b_data; + + if (sbi->fat_bits == 32) { + if (set) + b->fat32.state |= FAT_STATE_DIRTY; + else + b->fat32.state &= ~FAT_STATE_DIRTY; + } else /* fat 16 and 12 */ { + if (set) + b->fat16.state |= FAT_STATE_DIRTY; + else + b->fat16.state &= ~FAT_STATE_DIRTY; + } + + mark_buffer_dirty(bh); + sync_dirty_buffer(bh); + brelse(bh); +} + static void fat_put_super(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); + fat_set_state(sb, 0, 0); + iput(sbi->fsinfo_inode); iput(sbi->fat_inode); @@ -566,8 +615,18 @@ static void __exit fat_destroy_inodecache(void) static int fat_remount(struct super_block *sb, int *flags, char *data) { + int new_rdonly; struct msdos_sb_info *sbi = MSDOS_SB(sb); *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); + + /* make sure we update state on remount. */ + new_rdonly = *flags & MS_RDONLY; + if (new_rdonly != (sb->s_flags & MS_RDONLY)) { + if (new_rdonly) + fat_set_state(sb, 0, 0); + else + fat_set_state(sb, 1, 1); + } return 0; } @@ -1362,6 +1421,12 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, if (sbi->fat_bits != 32) sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12; + /* some OSes set FAT_STATE_DIRTY and clean it on unmount. */ + if (sbi->fat_bits == 32) + sbi->dirty = b->fat32.state & FAT_STATE_DIRTY; + else /* fat 16 or 12 */ + sbi->dirty = b->fat16.state & FAT_STATE_DIRTY; + /* check that FAT table does not overflow */ fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); @@ -1456,6 +1521,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, "the device does not support discard"); } + fat_set_state(sb, 1, 0); return 0; out_invalid: diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h index b9f12450efe8..f055e58b3147 100644 --- a/include/uapi/linux/msdos_fs.h +++ b/include/uapi/linux/msdos_fs.h @@ -87,6 +87,8 @@ #define IS_FSINFO(x) (le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \ && le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2) +#define FAT_STATE_DIRTY 0x01 + struct __fat_dirent { long d_ino; __kernel_off_t d_off; -- cgit v1.2.3 From 59fb1b9f5d9910c2eb97107dd0eb7e3bce8f0dde Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 27 Feb 2013 17:05:11 -0800 Subject: ipmi: remove superfluous kernel/userspace explanation Given the obvious distinction between kernel and userspace supported by uapi/, it seems unnecessary to comment on that. Signed-off-by: Robert P. J. Day Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipmi.h | 4 ---- include/uapi/linux/ipmi.h | 10 +--------- 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 1487e7906bbd..1f9f56e28851 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -35,10 +35,6 @@ #include - -/* - * The in-kernel interface. - */ #include #include diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h index 33fbc99b3812..7b26a62e5707 100644 --- a/include/uapi/linux/ipmi.h +++ b/include/uapi/linux/ipmi.h @@ -59,15 +59,7 @@ * if it becomes full and it is queried once a second to see if * anything is in it. Incoming commands to the driver will get * delivered as commands. - * - * This driver provides two main interfaces: one for in-kernel - * applications and another for userland applications. The - * capabilities are basically the same for both interface, although - * the interfaces are somewhat different. The stuff in the - * #ifdef __KERNEL__ below is the in-kernel interface. The userland - * interface is defined later in the file. */ - - + */ /* * This is an overlay for all the address types, so it's easy to -- cgit v1.2.3 From 75f187aba5e7a3eea259041f85099029774a4c5b Mon Sep 17 00:00:00 2001 From: Alex Bligh Date: Wed, 27 Feb 2013 17:05:23 -0800 Subject: nbd: support FLUSH requests Currently, the NBD device does not accept flush requests from the Linux block layer. If the NBD server opened the target with neither O_SYNC nor O_DSYNC, however, the device will be effectively backed by a writeback cache. Without issuing flushes properly, operation of the NBD device will not be safe against power losses. The NBD protocol has support for both a cache flush command and a FUA command flag; the server will also pass a flag to note its support for these features. This patch adds support for the cache flush command and flag. In the kernel, we receive the flags via the NBD_SET_FLAGS ioctl, and map NBD_FLAG_SEND_FLUSH to the argument of blk_queue_flush. When the flag is active the block layer will send REQ_FLUSH requests, which we translate to NBD_CMD_FLUSH commands. FUA support is not included in this patch because all free software servers implement it with a full fdatasync; thus it has no advantage over supporting flush only. Because I [Paolo] cannot really benchmark it in a realistic scenario, I cannot tell if it is a good idea or not. It is also not clear if it is valid for an NBD server to support FUA but not flush. The Linux block layer gives a warning for this combination, the NBD protocol documentation says nothing about it. The patch also fixes a small problem in the handling of flags: nbd->flags must be cleared at the end of NBD_DO_IT, but the driver was not doing that. The bug manifests itself as follows. Suppose you two different client/server pairs to start the NBD device. Suppose also that the first client supports NBD_SET_FLAGS, and the first server sends NBD_FLAG_SEND_FLUSH; the second pair instead does neither of these two things. Before this patch, the second invocation of NBD_DO_IT will use a stale value of nbd->flags, and the second server will issue an error every time it receives an NBD_CMD_FLUSH command. This bug is pre-existing, but it becomes much more important after this patch; flush failures make the device pretty much unusable, unlike Signed-off-by: Paolo Bonzini Signed-off-by: Alex Bligh Acked-by: Paul Clements Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/nbd.c | 22 ++++++++++++++++++++-- include/uapi/linux/nbd.h | 3 ++- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ade146bf65e5..695c68fedd32 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -98,6 +98,7 @@ static const char *nbdcmd_to_ascii(int cmd) case NBD_CMD_READ: return "read"; case NBD_CMD_WRITE: return "write"; case NBD_CMD_DISC: return "disconnect"; + case NBD_CMD_FLUSH: return "flush"; case NBD_CMD_TRIM: return "trim/discard"; } return "invalid"; @@ -244,8 +245,15 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) request.magic = htonl(NBD_REQUEST_MAGIC); request.type = htonl(nbd_cmd(req)); - request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); - request.len = htonl(size); + + if (nbd_cmd(req) == NBD_CMD_FLUSH) { + /* Other values are reserved for FLUSH requests. */ + request.from = 0; + request.len = 0; + } else { + request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); + request.len = htonl(size); + } memcpy(request.handle, &req, sizeof(req)); dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", @@ -482,6 +490,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) } } + if (req->cmd_flags & REQ_FLUSH) { + BUG_ON(unlikely(blk_rq_sectors(req))); + nbd_cmd(req) = NBD_CMD_FLUSH; + } + req->errors = 0; mutex_lock(&nbd->tx_lock); @@ -684,6 +697,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (nbd->flags & NBD_FLAG_SEND_TRIM) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + if (nbd->flags & NBD_FLAG_SEND_FLUSH) + blk_queue_flush(nbd->disk->queue, REQ_FLUSH); + else + blk_queue_flush(nbd->disk->queue, 0); thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); if (IS_ERR(thread)) { @@ -705,6 +722,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); if (file) fput(file); + nbd->flags = 0; nbd->bytesize = 0; bdev->bd_inode->i_size = 0; set_capacity(nbd->disk, 0); diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index dfb514472cbc..4f52549b23ff 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -33,13 +33,14 @@ enum { NBD_CMD_READ = 0, NBD_CMD_WRITE = 1, NBD_CMD_DISC = 2, - /* there is a gap here to match userspace */ + NBD_CMD_FLUSH = 3, NBD_CMD_TRIM = 4 }; /* values for flags field */ #define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */ #define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ +#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ /* there is a gap here to match userspace */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ -- cgit v1.2.3 From 02cde50b7ea74557d32ff778c73809322445ccd2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm ioctl: optimize functions without variable params Device-mapper ioctls receive and send data in a buffer supplied by userspace. The buffer has two parts. The first part contains a 'struct dm_ioctl' and has a fixed size. The second part depends on the ioctl and has a variable size. This patch recognises the specific ioctls that do not use the variable part of the buffer and skips allocating memory for it. In particular, when a device is suspended and a resume ioctl is sent, this now avoid memory allocation completely. The variable "struct dm_ioctl tmp" is moved from the function copy_params to its caller ctl_ioctl and renamed to param_kernel. It is used directly when the ioctl function doesn't need any arguments. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 52 ++++++++++++++++++++++++++++--------------- include/uapi/linux/dm-ioctl.h | 6 ++--- 2 files changed, 37 insertions(+), 21 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 9ae11b2994f8..7eb0682d574f 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1560,7 +1560,8 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) return r; } -#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */ +#define DM_PARAMS_KMALLOC 0x0001 /* Params alloced with kmalloc */ +#define DM_PARAMS_VMALLOC 0x0002 /* Params alloced with vmalloc */ #define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */ static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags) @@ -1568,66 +1569,80 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla if (param_flags & DM_WIPE_BUFFER) memset(param, 0, param_size); + if (param_flags & DM_PARAMS_KMALLOC) + kfree(param); if (param_flags & DM_PARAMS_VMALLOC) vfree(param); - else - kfree(param); } -static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags) +static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel, + int ioctl_flags, + struct dm_ioctl **param, int *param_flags) { - struct dm_ioctl tmp, *dmi; + struct dm_ioctl *dmi; int secure_data; + const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data); - if (copy_from_user(&tmp, user, sizeof(tmp) - sizeof(tmp.data))) + if (copy_from_user(param_kernel, user, minimum_data_size)) return -EFAULT; - if (tmp.data_size < (sizeof(tmp) - sizeof(tmp.data))) + if (param_kernel->data_size < minimum_data_size) return -EINVAL; - secure_data = tmp.flags & DM_SECURE_DATA_FLAG; + secure_data = param_kernel->flags & DM_SECURE_DATA_FLAG; *param_flags = secure_data ? DM_WIPE_BUFFER : 0; + if (ioctl_flags & IOCTL_FLAGS_NO_PARAMS) { + dmi = param_kernel; + dmi->data_size = minimum_data_size; + goto data_copied; + } + /* * Try to avoid low memory issues when a device is suspended. * Use kmalloc() rather than vmalloc() when we can. */ dmi = NULL; - if (tmp.data_size <= KMALLOC_MAX_SIZE) - dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (param_kernel->data_size <= KMALLOC_MAX_SIZE) { + dmi = kmalloc(param_kernel->data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (dmi) + *param_flags |= DM_PARAMS_KMALLOC; + } if (!dmi) { - dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); - *param_flags |= DM_PARAMS_VMALLOC; + dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + if (dmi) + *param_flags |= DM_PARAMS_VMALLOC; } if (!dmi) { - if (secure_data && clear_user(user, tmp.data_size)) + if (secure_data && clear_user(user, param_kernel->data_size)) return -EFAULT; return -ENOMEM; } - if (copy_from_user(dmi, user, tmp.data_size)) + if (copy_from_user(dmi, user, param_kernel->data_size)) goto bad; +data_copied: /* * Abort if something changed the ioctl data while it was being copied. */ - if (dmi->data_size != tmp.data_size) { + if (dmi->data_size != param_kernel->data_size) { DMERR("rejecting ioctl: data size modified while processing parameters"); goto bad; } /* Wipe the user buffer so we do not return it to userspace */ - if (secure_data && clear_user(user, tmp.data_size)) + if (secure_data && clear_user(user, param_kernel->data_size)) goto bad; *param = dmi; return 0; bad: - free_params(dmi, tmp.data_size, *param_flags); + free_params(dmi, param_kernel->data_size, *param_flags); return -EFAULT; } @@ -1671,6 +1686,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) struct dm_ioctl *uninitialized_var(param); ioctl_fn fn = NULL; size_t input_param_size; + struct dm_ioctl param_kernel; /* only root can play with this */ if (!capable(CAP_SYS_ADMIN)) @@ -1704,7 +1720,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) /* * Copy the parameters into kernel space. */ - r = copy_params(user, ¶m, ¶m_flags); + r = copy_params(user, ¶m_kernel, ioctl_flags, ¶m, ¶m_flags); if (r) return r; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 539b179b349c..b8a6bddf0727 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 23 -#define DM_VERSION_PATCHLEVEL 1 -#define DM_VERSION_EXTRA "-ioctl (2012-12-18)" +#define DM_VERSION_MINOR 24 +#define DM_VERSION_PATCHLEVEL 0 +#define DM_VERSION_EXTRA "-ioctl (2013-01-15)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From a26062416ef8add48f16fbadded2b5f6fb84d024 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm ioctl: allow message to return data This patch introduces enhanced message support that allows the device-mapper core to recognise messages that are common to all devices, and for messages to return data to userspace. Core messages are processed by the function "message_for_md". If the device mapper doesn't support the message, it is passed to the target driver. If the message returns data, the kernel sets the flag DM_MESSAGE_OUT_FLAG. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 36 +++++++++++++++++++++++++++++++++++- include/uapi/linux/dm-ioctl.h | 5 +++++ 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 7eb0682d574f..aa04f0224642 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1414,6 +1414,22 @@ static int table_status(struct dm_ioctl *param, size_t param_size) return 0; } +static bool buffer_test_overflow(char *result, unsigned maxlen) +{ + return !maxlen || strlen(result) + 1 >= maxlen; +} + +/* + * Process device-mapper dependent messages. + * Returns a number <= 1 if message was processed by device mapper. + * Returns 2 if message should be delivered to the target. + */ +static int message_for_md(struct mapped_device *md, unsigned argc, char **argv, + char *result, unsigned maxlen) +{ + return 2; +} + /* * Pass a message to the target that's at the supplied device offset. */ @@ -1425,6 +1441,8 @@ static int target_message(struct dm_ioctl *param, size_t param_size) struct dm_table *table; struct dm_target *ti; struct dm_target_msg *tmsg = (void *) param + param->data_start; + size_t maxlen; + char *result = get_result_buffer(param, param_size, &maxlen); md = find_device(param); if (!md) @@ -1448,6 +1466,10 @@ static int target_message(struct dm_ioctl *param, size_t param_size) goto out_argv; } + r = message_for_md(md, argc, argv, result, maxlen); + if (r <= 1) + goto out_argv; + table = dm_get_live_table(md); if (!table) goto out_argv; @@ -1473,7 +1495,18 @@ static int target_message(struct dm_ioctl *param, size_t param_size) out_argv: kfree(argv); out: - param->data_size = 0; + if (r >= 0) + __dev_status(md, param); + + if (r == 1) { + param->flags |= DM_DATA_OUT_FLAG; + if (buffer_test_overflow(result, maxlen)) + param->flags |= DM_BUFFER_FULL_FLAG; + else + param->data_size = param->data_start + strlen(result) + 1; + r = 0; + } + dm_put(md); return r; } @@ -1653,6 +1686,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) param->flags &= ~DM_BUFFER_FULL_FLAG; param->flags &= ~DM_UEVENT_GENERATED_FLAG; param->flags &= ~DM_SECURE_DATA_FLAG; + param->flags &= ~DM_DATA_OUT_FLAG; /* Ignores parameters */ if (cmd == DM_REMOVE_ALL_CMD || diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index b8a6bddf0727..7e75b6fd8d45 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -336,4 +336,9 @@ enum { */ #define DM_SECURE_DATA_FLAG (1 << 15) /* In */ +/* + * If set, a message generated output data. + */ +#define DM_DATA_OUT_FLAG (1 << 16) /* Out */ + #endif /* _LINUX_DM_IOCTL_H */ -- cgit v1.2.3 From bc3966bf1583a6c22b76397535174445c43952de Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 9 Oct 2012 10:54:36 +0100 Subject: metag: ptrace The ptrace interface for metag provides access to some core register sets using the PTRACE_GETREGSET and PTRACE_SETREGSET operations. The details of the internal context structures is abstracted into user API structures to both ease use and allow flexibility to change the internal context layouts. Copyin and copyout functions for these register sets are exposed to allow signal handling code to use them to copy to and from the signal context. struct user_gp_regs (NT_PRSTATUS) provides access to the core general purpose register context. struct user_cb_regs (NT_METAG_CBUF) provides access to the TXCATCH* registers which contains information abuot a memory fault, unaligned access error or watchpoint. This can be modified to alter the way the fault is replayed on resume ("catch replay"), or to prevent the replay taking place. struct user_rp_state (NT_METAG_RPIPE) provides access to the state of the Meta read pipeline which can be used to hide memory latencies in hand optimised data loops. Extended DSP register state, DSP RAM, and hardware breakpoint registers aren't yet exposed through ptrace. Signed-off-by: James Hogan Cc: Andrew Morton Cc: Denys Vlasenko Cc: Arnd Bergmann Cc: Tony Lindgren Cc: "Paul E. McKenney" --- arch/metag/include/asm/ptrace.h | 60 ++++++ arch/metag/include/uapi/asm/ptrace.h | 113 +++++++++++ arch/metag/kernel/ptrace.c | 380 +++++++++++++++++++++++++++++++++++ include/uapi/linux/elf.h | 2 + 4 files changed, 555 insertions(+) create mode 100644 arch/metag/include/asm/ptrace.h create mode 100644 arch/metag/include/uapi/asm/ptrace.h create mode 100644 arch/metag/kernel/ptrace.c (limited to 'include/uapi/linux') diff --git a/arch/metag/include/asm/ptrace.h b/arch/metag/include/asm/ptrace.h new file mode 100644 index 000000000000..fcabc18daf25 --- /dev/null +++ b/arch/metag/include/asm/ptrace.h @@ -0,0 +1,60 @@ +#ifndef _METAG_PTRACE_H +#define _METAG_PTRACE_H + +#include +#include +#include + +#ifndef __ASSEMBLY__ + +/* this struct defines the way the registers are stored on the + stack during a system call. */ + +struct pt_regs { + TBICTX ctx; + TBICTXEXTCB0 extcb0[5]; +}; + +#define user_mode(regs) (((regs)->ctx.SaveMask & TBICTX_PRIV_BIT) > 0) + +#define instruction_pointer(regs) ((unsigned long)(regs)->ctx.CurrPC) +#define profile_pc(regs) instruction_pointer(regs) + +#define task_pt_regs(task) \ + ((struct pt_regs *)(task_stack_page(task) + \ + sizeof(struct thread_info))) + +#define current_pt_regs() \ + ((struct pt_regs *)((char *)current_thread_info() + \ + sizeof(struct thread_info))) + +int syscall_trace_enter(struct pt_regs *regs); +void syscall_trace_leave(struct pt_regs *regs); + +/* copy a struct user_gp_regs out to user */ +int metag_gp_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); +/* copy a struct user_gp_regs in from user */ +int metag_gp_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +/* copy a struct user_cb_regs out to user */ +int metag_cb_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); +/* copy a struct user_cb_regs in from user */ +int metag_cb_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +/* copy a struct user_rp_state out to user */ +int metag_rp_state_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); +/* copy a struct user_rp_state in from user */ +int metag_rp_state_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +#endif /* __ASSEMBLY__ */ +#endif /* _METAG_PTRACE_H */ diff --git a/arch/metag/include/uapi/asm/ptrace.h b/arch/metag/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..45d97809d33e --- /dev/null +++ b/arch/metag/include/uapi/asm/ptrace.h @@ -0,0 +1,113 @@ +#ifndef _UAPI_METAG_PTRACE_H +#define _UAPI_METAG_PTRACE_H + +#ifndef __ASSEMBLY__ + +/* + * These are the layouts of the regsets returned by the GETREGSET ptrace call + */ + +/* user_gp_regs::status */ + +/* CBMarker bit (indicates catch state / catch replay) */ +#define USER_GP_REGS_STATUS_CATCH_BIT (1 << 22) +#define USER_GP_REGS_STATUS_CATCH_S 22 +/* LSM_STEP field (load/store multiple step) */ +#define USER_GP_REGS_STATUS_LSM_STEP_BITS (0x7 << 8) +#define USER_GP_REGS_STATUS_LSM_STEP_S 8 +/* SCC bit (indicates split 16x16 condition flags) */ +#define USER_GP_REGS_STATUS_SCC_BIT (1 << 4) +#define USER_GP_REGS_STATUS_SCC_S 4 + +/* normal condition flags */ +/* CF_Z bit (Zero flag) */ +#define USER_GP_REGS_STATUS_CF_Z_BIT (1 << 3) +#define USER_GP_REGS_STATUS_CF_Z_S 3 +/* CF_N bit (Negative flag) */ +#define USER_GP_REGS_STATUS_CF_N_BIT (1 << 2) +#define USER_GP_REGS_STATUS_CF_N_S 2 +/* CF_V bit (oVerflow flag) */ +#define USER_GP_REGS_STATUS_CF_V_BIT (1 << 1) +#define USER_GP_REGS_STATUS_CF_V_S 1 +/* CF_C bit (Carry flag) */ +#define USER_GP_REGS_STATUS_CF_C_BIT (1 << 0) +#define USER_GP_REGS_STATUS_CF_C_S 0 + +/* split 16x16 condition flags */ +/* SCF_LZ bit (Low Zero flag) */ +#define USER_GP_REGS_STATUS_SCF_LZ_BIT (1 << 3) +#define USER_GP_REGS_STATUS_SCF_LZ_S 3 +/* SCF_HZ bit (High Zero flag) */ +#define USER_GP_REGS_STATUS_SCF_HZ_BIT (1 << 2) +#define USER_GP_REGS_STATUS_SCF_HZ_S 2 +/* SCF_HC bit (High Carry flag) */ +#define USER_GP_REGS_STATUS_SCF_HC_BIT (1 << 1) +#define USER_GP_REGS_STATUS_SCF_HC_S 1 +/* SCF_LC bit (Low Carry flag) */ +#define USER_GP_REGS_STATUS_SCF_LC_BIT (1 << 0) +#define USER_GP_REGS_STATUS_SCF_LC_S 0 + +/** + * struct user_gp_regs - User general purpose registers + * @dx: GP data unit regs (dx[reg][unit] = D{unit:0-1}.{reg:0-7}) + * @ax: GP address unit regs (ax[reg][unit] = A{unit:0-1}.{reg:0-3}) + * @pc: PC register + * @status: TXSTATUS register (condition flags, LSM_STEP etc) + * @rpt: TXRPT registers (branch repeat counter) + * @bpobits: TXBPOBITS register ("branch prediction other" bits) + * @mode: TXMODE register + * @_pad1: Reserved padding to make sizeof obviously 64bit aligned + * + * This is the user-visible general purpose register state structure. + * + * It can be accessed through PTRACE_GETREGSET with NT_PRSTATUS. + * + * It is also used in the signal context. + */ +struct user_gp_regs { + unsigned long dx[8][2]; + unsigned long ax[4][2]; + unsigned long pc; + unsigned long status; + unsigned long rpt; + unsigned long bpobits; + unsigned long mode; + unsigned long _pad1; +}; + +/** + * struct user_cb_regs - User catch buffer registers + * @flags: TXCATCH0 register (fault flags) + * @addr: TXCATCH1 register (fault address) + * @data: TXCATCH2 and TXCATCH3 registers (low and high data word) + * + * This is the user-visible catch buffer register state structure containing + * information about a failed memory access, and allowing the access to be + * modified and replayed. + * + * It can be accessed through PTRACE_GETREGSET with NT_METAG_CBUF. + */ +struct user_cb_regs { + unsigned long flags; + unsigned long addr; + unsigned long long data; +}; + +/** + * struct user_rp_state - User read pipeline state + * @entries: Read pipeline entries + * @mask: Mask of valid pipeline entries (RPMask from TXDIVTIME register) + * + * This is the user-visible read pipeline state structure containing the entries + * currently in the read pipeline and the mask of valid entries. + * + * It can be accessed through PTRACE_GETREGSET with NT_METAG_RPIPE. + */ +struct user_rp_state { + unsigned long long entries[6]; + unsigned long mask; +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _UAPI_METAG_PTRACE_H */ diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c new file mode 100644 index 000000000000..47a8828615a5 --- /dev/null +++ b/arch/metag/kernel/ptrace.c @@ -0,0 +1,380 @@ +/* + * Copyright (C) 2005-2012 Imagination Technologies Ltd. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of + * this archive for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +/* + * user_regset definitions. + */ + +int metag_gp_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const void *ptr; + unsigned long data; + int ret; + + /* D{0-1}.{0-7} */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->ctx.DX, 0, 4*16); + if (ret) + goto out; + /* A{0-1}.{0-1} */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->ctx.AX, 4*16, 4*20); + if (ret) + goto out; + /* A{0-1}.2 */ + if (regs->ctx.SaveMask & TBICTX_XEXT_BIT) + ptr = regs->ctx.Ext.Ctx.pExt; + else + ptr = ®s->ctx.Ext.AX2; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ptr, 4*20, 4*22); + if (ret) + goto out; + /* A{0-1}.3 */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->ctx.AX3, 4*22, 4*24); + if (ret) + goto out; + /* PC */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrPC, 4*24, 4*25); + if (ret) + goto out; + /* TXSTATUS */ + data = (unsigned long)regs->ctx.Flags; + if (regs->ctx.SaveMask & TBICTX_CBUF_BIT) + data |= USER_GP_REGS_STATUS_CATCH_BIT; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &data, 4*25, 4*26); + if (ret) + goto out; + /* TXRPT, TXBPOBITS, TXMODE */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrRPT, 4*26, 4*29); + if (ret) + goto out; + /* Padding */ + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 4*29, 4*30); +out: + return ret; +} + +int metag_gp_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + void *ptr; + unsigned long data; + int ret; + + /* D{0-1}.{0-7} */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->ctx.DX, 0, 4*16); + if (ret) + goto out; + /* A{0-1}.{0-1} */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->ctx.AX, 4*16, 4*20); + if (ret) + goto out; + /* A{0-1}.2 */ + if (regs->ctx.SaveMask & TBICTX_XEXT_BIT) + ptr = regs->ctx.Ext.Ctx.pExt; + else + ptr = ®s->ctx.Ext.AX2; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ptr, 4*20, 4*22); + if (ret) + goto out; + /* A{0-1}.3 */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->ctx.AX3, 4*22, 4*24); + if (ret) + goto out; + /* PC */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrPC, 4*24, 4*25); + if (ret) + goto out; + /* TXSTATUS */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &data, 4*25, 4*26); + if (ret) + goto out; + regs->ctx.Flags = data & 0xffff; + if (data & USER_GP_REGS_STATUS_CATCH_BIT) + regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBUF_BIT; + else + regs->ctx.SaveMask &= ~TBICTX_CBUF_BIT; + /* TXRPT, TXBPOBITS, TXMODE */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrRPT, 4*26, 4*29); +out: + return ret; +} + +static int metag_gp_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct pt_regs *regs = task_pt_regs(target); + return metag_gp_regs_copyout(regs, pos, count, kbuf, ubuf); +} + +static int metag_gp_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + return metag_gp_regs_copyin(regs, pos, count, kbuf, ubuf); +} + +int metag_cb_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + /* TXCATCH{0-3} */ + if (regs->ctx.SaveMask & TBICTX_XCBF_BIT) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->extcb0, 0, 4*4); + else + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 0, 4*4); + return ret; +} + +int metag_cb_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + /* TXCATCH{0-3} */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->extcb0, 0, 4*4); + return ret; +} + +static int metag_cb_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct pt_regs *regs = task_pt_regs(target); + return metag_cb_regs_copyout(regs, pos, count, kbuf, ubuf); +} + +static int metag_cb_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + return metag_cb_regs_copyin(regs, pos, count, kbuf, ubuf); +} + +int metag_rp_state_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned long mask; + u64 *ptr; + int ret, i; + + /* Empty read pipeline */ + if (!(regs->ctx.SaveMask & TBICTX_CBRP_BIT)) { + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 0, 4*13); + goto out; + } + + mask = (regs->ctx.CurrDIVTIME & TXDIVTIME_RPMASK_BITS) >> + TXDIVTIME_RPMASK_S; + + /* Read pipeline entries */ + ptr = (void *)®s->extcb0[1]; + for (i = 0; i < 6; ++i, ++ptr) { + if (mask & (1 << i)) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ptr, 8*i, 8*(i + 1)); + else + ret = user_regset_copyout_zero(&pos, &count, &kbuf, + &ubuf, 8*i, 8*(i + 1)); + if (ret) + goto out; + } + /* Mask of entries */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &mask, 4*12, 4*13); +out: + return ret; +} + +int metag_rp_state_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_rp_state rp; + unsigned long long *ptr; + int ret, i; + + /* Read the entire pipeline before making any changes */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &rp, 0, 4*13); + if (ret) + goto out; + + /* Write pipeline entries */ + ptr = (void *)®s->extcb0[1]; + for (i = 0; i < 6; ++i, ++ptr) + if (rp.mask & (1 << i)) + *ptr = rp.entries[i]; + + /* Update RPMask in TXDIVTIME */ + regs->ctx.CurrDIVTIME &= ~TXDIVTIME_RPMASK_BITS; + regs->ctx.CurrDIVTIME |= (rp.mask << TXDIVTIME_RPMASK_S) + & TXDIVTIME_RPMASK_BITS; + + /* Set/clear flags to indicate catch/read pipeline state */ + if (rp.mask) + regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBRP_BIT; + else + regs->ctx.SaveMask &= ~TBICTX_CBRP_BIT; +out: + return ret; +} + +static int metag_rp_state_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct pt_regs *regs = task_pt_regs(target); + return metag_rp_state_copyout(regs, pos, count, kbuf, ubuf); +} + +static int metag_rp_state_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + return metag_rp_state_copyin(regs, pos, count, kbuf, ubuf); +} + +enum metag_regset { + REGSET_GENERAL, + REGSET_CBUF, + REGSET_READPIPE, +}; + +static const struct user_regset metag_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(long), + .align = sizeof(long long), + .get = metag_gp_regs_get, + .set = metag_gp_regs_set, + }, + [REGSET_CBUF] = { + .core_note_type = NT_METAG_CBUF, + .n = sizeof(struct user_cb_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long long), + .get = metag_cb_regs_get, + .set = metag_cb_regs_set, + }, + [REGSET_READPIPE] = { + .core_note_type = NT_METAG_RPIPE, + .n = sizeof(struct user_rp_state) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long long), + .get = metag_rp_state_get, + .set = metag_rp_state_set, + }, +}; + +static const struct user_regset_view user_metag_view = { + .name = "metag", + .e_machine = EM_METAG, + .regsets = metag_regsets, + .n = ARRAY_SIZE(metag_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_metag_view; +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *child) +{ + /* nothing to do.. */ +} + +long arch_ptrace(struct task_struct *child, long request, unsigned long addr, + unsigned long data) +{ + int ret; + + switch (request) { + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} + +int syscall_trace_enter(struct pt_regs *regs) +{ + int ret = 0; + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + ret = tracehook_report_syscall_entry(regs); + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->ctx.DX[0].U1); + + return ret ? -1 : regs->ctx.DX[0].U1; +} + +void syscall_trace_leave(struct pt_regs *regs) +{ + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->ctx.DX[0].U1); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, 0); +} diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 126a8175e3e2..eb164a298b98 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -395,6 +395,8 @@ typedef struct elf64_shdr { #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ +#define NT_METAG_CBUF 0x500 /* Metag catch buffer registers */ +#define NT_METAG_RPIPE 0x501 /* Metag read pipeline state */ /* Note header in a PT_NOTE section */ -- cgit v1.2.3