1 /******************************************************************************
4 * Unified network-device I/O interface for Xen guest OSes.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
24 * Copyright (c) 2003-2004, Keir Fraser
27 #ifndef __XEN_PUBLIC_IO_NETIF_H__
28 #define __XEN_PUBLIC_IO_NETIF_H__
33 #include "../grant_table.h"
36 * Older implementation of Xen network frontend / backend has an
37 * implicit dependency on the MAX_SKB_FRAGS as the maximum number of
38 * ring slots a skb can use. Netfront / netback may not work as
39 * expected when frontend and backend have different MAX_SKB_FRAGS.
41 * A better approach is to add mechanism for netfront / netback to
42 * negotiate this value. However we cannot fix all possible
43 * frontends, so we need to define a value which states the minimum
44 * slots backend must support.
46 * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS
47 * (18), which is proved to work with most frontends. Any new backend
48 * which doesn't negotiate with frontend should expect frontend to
49 * send a valid packet using slots up to this value.
51 #define XEN_NETIF_NR_SLOTS_MIN 18
54 * Notifications after enqueuing any type of message should be conditional on
55 * the appropriate req_event or rsp_event field in the shared ring.
56 * If the client sends notification for rx requests then it should specify
57 * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume
58 * that it cannot safely queue packets (as it may not be kicked to send them).
62 * "feature-split-event-channels" is introduced to separate guest TX
63 * and RX notification. Backend either doesn't support this feature or
64 * advertises it via xenstore as 0 (disabled) or 1 (enabled).
66 * To make use of this feature, frontend should allocate two event
67 * channels for TX and RX, advertise them to backend as
68 * "event-channel-tx" and "event-channel-rx" respectively. If frontend
69 * doesn't want to use this feature, it just writes "event-channel"
74 * Multiple transmit and receive queues:
75 * If supported, the backend will write the key "multi-queue-max-queues" to
76 * the directory for that vif, and set its value to the maximum supported
78 * Frontends that are aware of this feature and wish to use it can write the
79 * key "multi-queue-num-queues", set to the number they wish to use, which
80 * must be greater than zero, and no more than the value reported by the backend
81 * in "multi-queue-max-queues".
83 * Queues replicate the shared rings and event channels.
84 * "feature-split-event-channels" may optionally be used when using
85 * multiple queues, but is not mandatory.
87 * Each queue consists of one shared ring pair, i.e. there must be the same
88 * number of tx and rx rings.
90 * For frontends requesting just one queue, the usual event-channel and
91 * ring-ref keys are written as before, simplifying the backend processing
92 * to avoid distinguishing between a frontend that doesn't understand the
93 * multi-queue feature, and one that does, but requested only one queue.
95 * Frontends requesting two or more queues must not write the toplevel
96 * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys,
97 * instead writing those keys under sub-keys having the name "queue-N" where
98 * N is the integer ID of the queue for which those keys belong. Queues
99 * are indexed from zero. For example, a frontend with two queues and split
100 * event channels must write the following set of queue-related keys:
102 * /local/domain/1/device/vif/0/multi-queue-num-queues = "2"
103 * /local/domain/1/device/vif/0/queue-0 = ""
104 * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>"
105 * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>"
106 * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>"
107 * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>"
108 * /local/domain/1/device/vif/0/queue-1 = ""
109 * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>"
110 * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1"
111 * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>"
112 * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>"
114 * If there is any inconsistency in the XenStore data, the backend may
115 * choose not to connect any queues, instead treating the request as an
116 * error. This includes scenarios where more (or fewer) queues were
117 * requested than the frontend provided details for.
119 * Mapping of packets to queues is considered to be a function of the
120 * transmitting system (backend or frontend) and is not negotiated
121 * between the two. Guests are free to transmit packets on any queue
122 * they choose, provided it has been set up correctly. Guests must be
123 * prepared to receive packets on any queue they have requested be set up.
127 * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum
128 * offload off or on. If it is missing then the feature is assumed to be on.
129 * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum
130 * offload on or off. If it is missing then the feature is assumed to be off.
134 * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to
135 * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither
136 * frontends nor backends are assumed to be capable unless the flags are
141 * This is the 'wire' format for packets:
142 * Request 1: netif_tx_request -- NETTXF_* (any flags)
143 * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info)
144 * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE)
145 * Request 4: netif_tx_request -- NETTXF_more_data
146 * Request 5: netif_tx_request -- NETTXF_more_data
148 * Request N: netif_tx_request -- 0
151 /* Protocol checksum field is blank in the packet (hardware offload)? */
152 #define _NETTXF_csum_blank (0)
153 #define NETTXF_csum_blank (1U<<_NETTXF_csum_blank)
155 /* Packet data has been validated against protocol checksum. */
156 #define _NETTXF_data_validated (1)
157 #define NETTXF_data_validated (1U<<_NETTXF_data_validated)
159 /* Packet continues in the next request descriptor. */
160 #define _NETTXF_more_data (2)
161 #define NETTXF_more_data (1U<<_NETTXF_more_data)
163 /* Packet to be followed by extra descriptor(s). */
164 #define _NETTXF_extra_info (3)
165 #define NETTXF_extra_info (1U<<_NETTXF_extra_info)
167 #define XEN_NETIF_MAX_TX_SIZE 0xFFFF
168 struct netif_tx_request {
169 grant_ref_t gref; /* Reference to buffer page */
170 uint16_t offset; /* Offset within buffer page */
171 uint16_t flags; /* NETTXF_* */
172 uint16_t id; /* Echoed in response message. */
173 uint16_t size; /* Packet size in bytes. */
175 typedef struct netif_tx_request netif_tx_request_t;
177 /* Types of netif_extra_info descriptors. */
178 #define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */
179 #define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */
180 #define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */
181 #define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */
182 #define XEN_NETIF_EXTRA_TYPE_MAX (4)
184 /* netif_extra_info flags. */
185 #define _XEN_NETIF_EXTRA_FLAG_MORE (0)
186 #define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
189 #define XEN_NETIF_GSO_TYPE_NONE (0)
190 #define XEN_NETIF_GSO_TYPE_TCPV4 (1)
191 #define XEN_NETIF_GSO_TYPE_TCPV6 (2)
194 * This structure needs to fit within both netif_tx_request and
195 * netif_rx_response for compatibility.
197 struct netif_extra_info {
198 uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */
199 uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */
203 * XEN_NETIF_EXTRA_TYPE_GSO:
207 * Maximum payload size of each segment. For example, for TCP this
208 * is just the path MSS.
213 * GSO type. This determines the protocol of the packet and any
214 * extra features required to segment the packet properly.
216 uint8_t type; /* XEN_NETIF_GSO_TYPE_* */
218 /* Future expansion. */
222 * GSO features. This specifies any extra GSO features required
223 * to process this packet, such as ECN support for TCPv4.
225 uint16_t features; /* XEN_NETIF_GSO_FEAT_* */
229 * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
230 * Backend advertises availability via 'feature-multicast-control'
231 * xenbus node containing value '1'.
232 * Frontend requests this feature by advertising
233 * 'request-multicast-control' xenbus node containing value '1'.
234 * If multicast control is requested then multicast flooding is
235 * disabled and the frontend must explicitly register its interest
236 * in multicast groups using dummy transmit requests containing
237 * MCAST_{ADD,DEL} extra-info fragments.
240 uint8_t addr[6]; /* Address to add/remove. */
246 typedef struct netif_extra_info netif_extra_info_t;
248 struct netif_tx_response {
250 int16_t status; /* NETIF_RSP_* */
252 typedef struct netif_tx_response netif_tx_response_t;
254 struct netif_rx_request {
255 uint16_t id; /* Echoed in response message. */
256 grant_ref_t gref; /* Reference to incoming granted frame */
258 typedef struct netif_rx_request netif_rx_request_t;
260 /* Packet data has been validated against protocol checksum. */
261 #define _NETRXF_data_validated (0)
262 #define NETRXF_data_validated (1U<<_NETRXF_data_validated)
264 /* Protocol checksum field is blank in the packet (hardware offload)? */
265 #define _NETRXF_csum_blank (1)
266 #define NETRXF_csum_blank (1U<<_NETRXF_csum_blank)
268 /* Packet continues in the next request descriptor. */
269 #define _NETRXF_more_data (2)
270 #define NETRXF_more_data (1U<<_NETRXF_more_data)
272 /* Packet to be followed by extra descriptor(s). */
273 #define _NETRXF_extra_info (3)
274 #define NETRXF_extra_info (1U<<_NETRXF_extra_info)
276 struct netif_rx_response {
278 uint16_t offset; /* Offset in page of start of received packet */
279 uint16_t flags; /* NETRXF_* */
280 int16_t status; /* -ve: NETIF_RSP_* ; +ve: Rx'ed pkt size. */
282 typedef struct netif_rx_response netif_rx_response_t;
285 * Generate netif ring structures and types.
288 DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
289 DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
291 #define NETIF_RSP_DROPPED -2
292 #define NETIF_RSP_ERROR -1
293 #define NETIF_RSP_OKAY 0
294 /* No response: used for auxiliary requests (e.g., netif_tx_extra). */
295 #define NETIF_RSP_NULL 1
302 * c-file-style: "BSD"
305 * indent-tabs-mode: nil