| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
 | /******************************************************************************
 * blkif.h
 *
 * Unified block-device I/O interface for Xen guest OSes.
 *
 * Copyright (c) 2003-2004, Keir Fraser
 */
#ifndef __XEN_PUBLIC_IO_BLKIF_H__
#define __XEN_PUBLIC_IO_BLKIF_H__
#include <xen/interface/io/ring.h>
#include <xen/interface/grant_table.h>
/*
 * Front->back notifications: When enqueuing a new request, sending a
 * notification can be made conditional on req_event (i.e., the generic
 * hold-off mechanism provided by the ring macros). Backends must set
 * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
 *
 * Back->front notifications: When enqueuing a new response, sending a
 * notification can be made conditional on rsp_event (i.e., the generic
 * hold-off mechanism provided by the ring macros). Frontends must set
 * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
 */
typedef uint16_t blkif_vdev_t;
typedef uint64_t blkif_sector_t;
/*
 * REQUEST CODES.
 */
#define BLKIF_OP_READ              0
#define BLKIF_OP_WRITE             1
/*
 * Recognised only if "feature-barrier" is present in backend xenbus info.
 * The "feature_barrier" node contains a boolean indicating whether barrier
 * requests are likely to succeed or fail. Either way, a barrier request
 * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
 * the underlying block-device hardware. The boolean simply indicates whether
 * or not it is worthwhile for the frontend to attempt barrier requests.
 * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not*
 * create the "feature-barrier" node!
 */
#define BLKIF_OP_WRITE_BARRIER     2
/*
 * Recognised if "feature-flush-cache" is present in backend xenbus
 * info.  A flush will ask the underlying storage hardware to flush its
 * non-volatile caches as appropriate.  The "feature-flush-cache" node
 * contains a boolean indicating whether flush requests are likely to
 * succeed or fail. Either way, a flush request may fail at any time
 * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying
 * block-device hardware. The boolean simply indicates whether or not it
 * is worthwhile for the frontend to attempt flushes.  If a backend does
 * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the
 * "feature-flush-cache" node!
 */
#define BLKIF_OP_FLUSH_DISKCACHE   3
/*
 * Recognised only if "feature-discard" is present in backend xenbus info.
 * The "feature-discard" node contains a boolean indicating whether trim
 * (ATA) or unmap (SCSI) - conviently called discard requests are likely
 * to succeed or fail. Either way, a discard request
 * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
 * the underlying block-device hardware. The boolean simply indicates whether
 * or not it is worthwhile for the frontend to attempt discard requests.
 * If a backend does not recognise BLKIF_OP_DISCARD, it should *not*
 * create the "feature-discard" node!
 *
 * Discard operation is a request for the underlying block device to mark
 * extents to be erased. However, discard does not guarantee that the blocks
 * will be erased from the device - it is just a hint to the device
 * controller that these blocks are no longer in use. What the device
 * controller does with that information is left to the controller.
 * Discard operations are passed with sector_number as the
 * sector index to begin discard operations at and nr_sectors as the number of
 * sectors to be discarded. The specified sectors should be discarded if the
 * underlying block device supports trim (ATA) or unmap (SCSI) operations,
 * or a BLKIF_RSP_EOPNOTSUPP  should be returned.
 * More information about trim/unmap operations at:
 * http://t13.org/Documents/UploadedDocuments/docs2008/
 *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
 * http://www.seagate.com/staticfiles/support/disc/manuals/
 *     Interface%20manuals/100293068c.pdf
 * The backend can optionally provide three extra XenBus attributes to
 * further optimize the discard functionality:
 * 'discard-aligment' - Devices that support discard functionality may
 * internally allocate space in units that are bigger than the exported
 * logical block size. The discard-alignment parameter indicates how many bytes
 * the beginning of the partition is offset from the internal allocation unit's
 * natural alignment.
 * 'discard-granularity'  - Devices that support discard functionality may
 * internally allocate space using units that are bigger than the logical block
 * size. The discard-granularity parameter indicates the size of the internal
 * allocation unit in bytes if reported by the device. Otherwise the
 * discard-granularity will be set to match the device's physical block size.
 * 'discard-secure' - All copies of the discarded sectors (potentially created
 * by garbage collection) must also be erased.  To use this feature, the flag
 * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim.
 */
#define BLKIF_OP_DISCARD           5
/*
 * Recognized if "feature-max-indirect-segments" in present in the backend
 * xenbus info. The "feature-max-indirect-segments" node contains the maximum
 * number of segments allowed by the backend per request. If the node is
 * present, the frontend might use blkif_request_indirect structs in order to
 * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
 * maximum number of indirect segments is fixed by the backend, but the
 * frontend can issue requests with any number of indirect segments as long as
 * it's less than the number provided by the backend. The indirect_grefs field
 * in blkif_request_indirect should be filled by the frontend with the
 * grant references of the pages that are holding the indirect segments.
 * These pages are filled with an array of blkif_request_segment that hold the
 * information about the segments. The number of indirect pages to use is
 * determined by the number of segments an indirect request contains. Every
 * indirect page can contain a maximum of
 * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
 * calculate the number of indirect pages to use we have to do
 * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
 *
 * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
 * create the "feature-max-indirect-segments" node!
 */
#define BLKIF_OP_INDIRECT          6
/*
 * Maximum scatter/gather segments per request.
 * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
 * NB. This could be 12 if the ring indexes weren't stored in the same page.
 */
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
struct blkif_request_segment {
		grant_ref_t gref;        /* reference to I/O buffer frame        */
		/* @first_sect: first sector in frame to transfer (inclusive).   */
		/* @last_sect: last sector in frame to transfer (inclusive).     */
		uint8_t     first_sect, last_sect;
};
struct blkif_request_rw {
	uint8_t        nr_segments;  /* number of segments                   */
	blkif_vdev_t   handle;       /* only for read/write requests         */
#ifndef CONFIG_X86_32
	uint32_t       _pad1;	     /* offsetof(blkif_request,u.rw.id) == 8 */
#endif
	uint64_t       id;           /* private guest value, echoed in resp  */
	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
} __attribute__((__packed__));
struct blkif_request_discard {
	uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero.        */
#define BLKIF_DISCARD_SECURE (1<<0)  /* ignored if discard-secure=0          */
	blkif_vdev_t   _pad1;        /* only for read/write requests         */
#ifndef CONFIG_X86_32
	uint32_t       _pad2;        /* offsetof(blkif_req..,u.discard.id)==8*/
#endif
	uint64_t       id;           /* private guest value, echoed in resp  */
	blkif_sector_t sector_number;
	uint64_t       nr_sectors;
	uint8_t        _pad3;
} __attribute__((__packed__));
struct blkif_request_other {
	uint8_t      _pad1;
	blkif_vdev_t _pad2;        /* only for read/write requests         */
#ifndef CONFIG_X86_32
	uint32_t     _pad3;        /* offsetof(blkif_req..,u.other.id)==8*/
#endif
	uint64_t     id;           /* private guest value, echoed in resp  */
} __attribute__((__packed__));
struct blkif_request_indirect {
	uint8_t        indirect_op;
	uint16_t       nr_segments;
#ifndef CONFIG_X86_32
	uint32_t       _pad1;        /* offsetof(blkif_...,u.indirect.id) == 8 */
#endif
	uint64_t       id;
	blkif_sector_t sector_number;
	blkif_vdev_t   handle;
	uint16_t       _pad2;
	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
#ifndef CONFIG_X86_32
	uint32_t      _pad3;         /* make it 64 byte aligned */
#else
	uint64_t      _pad3;         /* make it 64 byte aligned */
#endif
} __attribute__((__packed__));
struct blkif_request {
	uint8_t        operation;    /* BLKIF_OP_???                         */
	union {
		struct blkif_request_rw rw;
		struct blkif_request_discard discard;
		struct blkif_request_other other;
		struct blkif_request_indirect indirect;
	} u;
} __attribute__((__packed__));
struct blkif_response {
	uint64_t        id;              /* copied from request */
	uint8_t         operation;       /* copied from request */
	int16_t         status;          /* BLKIF_RSP_???       */
};
/*
 * STATUS RETURN CODES.
 */
 /* Operation not supported (only happens on barrier writes). */
#define BLKIF_RSP_EOPNOTSUPP  -2
 /* Operation failed for some unspecified reason (-EIO). */
#define BLKIF_RSP_ERROR       -1
 /* Operation completed successfully. */
#define BLKIF_RSP_OKAY         0
/*
 * Generate blkif ring structures and types.
 */
DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
#define VDISK_CDROM        0x1
#define VDISK_REMOVABLE    0x2
#define VDISK_READONLY     0x4
/* Xen-defined major numbers for virtual disks, they look strangely
 * familiar */
#define XEN_IDE0_MAJOR	3
#define XEN_IDE1_MAJOR	22
#define XEN_SCSI_DISK0_MAJOR	8
#define XEN_SCSI_DISK1_MAJOR	65
#define XEN_SCSI_DISK2_MAJOR	66
#define XEN_SCSI_DISK3_MAJOR	67
#define XEN_SCSI_DISK4_MAJOR	68
#define XEN_SCSI_DISK5_MAJOR	69
#define XEN_SCSI_DISK6_MAJOR	70
#define XEN_SCSI_DISK7_MAJOR	71
#define XEN_SCSI_DISK8_MAJOR	128
#define XEN_SCSI_DISK9_MAJOR	129
#define XEN_SCSI_DISK10_MAJOR	130
#define XEN_SCSI_DISK11_MAJOR	131
#define XEN_SCSI_DISK12_MAJOR	132
#define XEN_SCSI_DISK13_MAJOR	133
#define XEN_SCSI_DISK14_MAJOR	134
#define XEN_SCSI_DISK15_MAJOR	135
#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
 |