8 * This file defines the iPXE TCP API.
12 FILE_LICENCE ( GPL2_OR_LATER );
14 #include <ipxe/tcpip.h>
20 uint16_t src; /* Source port */
21 uint16_t dest; /* Destination port */
22 uint32_t seq; /* Sequence number */
23 uint32_t ack; /* Acknowledgement number */
24 uint8_t hlen; /* Header length (4), Reserved (4) */
25 uint8_t flags; /* Reserved (2), Flags (6) */
26 uint16_t win; /* Advertised window */
27 uint16_t csum; /* Checksum */
28 uint16_t urg; /* Urgent pointer */
31 /** @defgroup tcpopts TCP options
35 /** End of TCP options list */
36 #define TCP_OPTION_END 0
39 #define TCP_OPTION_NOP 1
41 /** Generic TCP option */
45 } __attribute__ (( packed ));
48 struct tcp_mss_option {
52 } __attribute__ (( packed ));
54 /** Code for the TCP MSS option */
55 #define TCP_OPTION_MSS 2
57 /** TCP window scale option */
58 struct tcp_window_scale_option {
62 } __attribute__ (( packed ));
64 /** Padded TCP window scale option (used for sending) */
65 struct tcp_window_scale_padded_option {
67 struct tcp_window_scale_option wsopt;
68 } __attribute (( packed ));
70 /** Code for the TCP window scale option */
71 #define TCP_OPTION_WS 3
73 /** Advertised TCP window scale
75 * Using a scale factor of 2**9 provides for a maximum window of 32MB,
76 * which is sufficient to allow Gigabit-speed transfers with a 200ms
77 * RTT. The minimum advertised window is 512 bytes, which is still
78 * less than a single packet.
80 #define TCP_RX_WINDOW_SCALE 9
82 /** TCP timestamp option */
83 struct tcp_timestamp_option {
88 } __attribute__ (( packed ));
90 /** Padded TCP timestamp option (used for sending) */
91 struct tcp_timestamp_padded_option {
93 struct tcp_timestamp_option tsopt;
94 } __attribute__ (( packed ));
96 /** Code for the TCP timestamp option */
97 #define TCP_OPTION_TS 8
99 /** Parsed TCP options */
101 /** MSS option, if present */
102 const struct tcp_mss_option *mssopt;
103 /** Window scale option, if present */
104 const struct tcp_window_scale_option *wsopt;
105 /** Timestamp option, if present */
106 const struct tcp_timestamp_option *tsopt;
124 * @defgroup tcpstates TCP states
126 * The TCP state is defined by a combination of the flags that have
127 * been sent to the peer, the flags that have been acknowledged by the
128 * peer, and the flags that have been received from the peer.
133 /** TCP flags that have been sent in outgoing packets */
134 #define TCP_STATE_SENT(flags) ( (flags) << 0 )
135 #define TCP_FLAGS_SENT(state) ( ( (state) >> 0 ) & 0xff )
137 /** TCP flags that have been acknowledged by the peer
139 * Note that this applies only to SYN and FIN.
141 #define TCP_STATE_ACKED(flags) ( (flags) << 8 )
142 #define TCP_FLAGS_ACKED(state) ( ( (state) >> 8 ) & 0xff )
144 /** TCP flags that have been received from the peer
146 * Note that this applies only to SYN and FIN, and that once SYN has
147 * been received, we should always be sending ACK.
149 #define TCP_STATE_RCVD(flags) ( (flags) << 16 )
150 #define TCP_FLAGS_RCVD(state) ( ( (state) >> 16 ) & 0xff )
152 /** TCP flags that are currently being sent in outgoing packets */
153 #define TCP_FLAGS_SENDING(state) \
154 ( TCP_FLAGS_SENT ( state ) & ~TCP_FLAGS_ACKED ( state ) )
158 * The connection has not yet been used for anything.
160 #define TCP_CLOSED TCP_RST
164 * Not currently used as a state; we have no support for listening
165 * connections. Given a unique value to avoid compiler warnings.
171 * SYN has been sent, nothing has yet been received or acknowledged.
173 #define TCP_SYN_SENT ( TCP_STATE_SENT ( TCP_SYN ) )
177 * SYN has been sent but not acknowledged, SYN has been received.
179 #define TCP_SYN_RCVD ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
180 TCP_STATE_RCVD ( TCP_SYN ) )
184 * SYN has been sent and acknowledged, SYN has been received.
186 #define TCP_ESTABLISHED ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
187 TCP_STATE_ACKED ( TCP_SYN ) | \
188 TCP_STATE_RCVD ( TCP_SYN ) )
192 * SYN has been sent and acknowledged, SYN has been received, FIN has
193 * been sent but not acknowledged, FIN has not been received.
195 * RFC 793 shows that we can enter FIN_WAIT_1 without have had SYN
196 * acknowledged, i.e. if the application closes the connection after
197 * sending and receiving SYN, but before having had SYN acknowledged.
198 * However, we have to *pretend* that SYN has been acknowledged
199 * anyway, otherwise we end up sending SYN and FIN in the same
200 * sequence number slot. Therefore, when we transition from SYN_RCVD
201 * to FIN_WAIT_1, we have to remember to set TCP_STATE_ACKED(TCP_SYN)
202 * and increment our sequence number.
204 #define TCP_FIN_WAIT_1 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
205 TCP_STATE_ACKED ( TCP_SYN ) | \
206 TCP_STATE_RCVD ( TCP_SYN ) )
210 * SYN has been sent and acknowledged, SYN has been received, FIN has
211 * been sent and acknowledged, FIN ha not been received.
213 #define TCP_FIN_WAIT_2 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
214 TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
215 TCP_STATE_RCVD ( TCP_SYN ) )
217 /** CLOSING / LAST_ACK
219 * SYN has been sent and acknowledged, SYN has been received, FIN has
220 * been sent but not acknowledged, FIN has been received.
222 * This state actually encompasses both CLOSING and LAST_ACK; they are
223 * identical with the definition of state that we use. I don't
224 * *believe* that they need to be distinguished.
226 #define TCP_CLOSING_OR_LAST_ACK \
227 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
228 TCP_STATE_ACKED ( TCP_SYN ) | \
229 TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
233 * SYN has been sent and acknowledged, SYN has been received, FIN has
234 * been sent and acknowledged, FIN has been received.
236 #define TCP_TIME_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
237 TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
238 TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
242 * SYN has been sent and acknowledged, SYN has been received, FIN has
245 #define TCP_CLOSE_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
246 TCP_STATE_ACKED ( TCP_SYN ) | \
247 TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
249 /** Can send data in current state
251 * We can send data if and only if we have had our SYN acked and we
252 * have not yet sent our FIN.
254 #define TCP_CAN_SEND_DATA(state) \
255 ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
256 TCP_STATE_SENT ( TCP_FIN ) ) ) \
257 == TCP_STATE_ACKED ( TCP_SYN ) )
259 /** Have ever been fully established
261 * We have been fully established if we have both received a SYN and
262 * had our own SYN acked.
264 #define TCP_HAS_BEEN_ESTABLISHED(state) \
265 ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
266 TCP_STATE_RCVD ( TCP_SYN ) ) ) \
267 == ( TCP_STATE_ACKED ( TCP_SYN ) | TCP_STATE_RCVD ( TCP_SYN ) ) )
269 /** Have closed gracefully
271 * We have closed gracefully if we have both received a FIN and had
274 #define TCP_CLOSED_GRACEFULLY(state) \
275 ( ( (state) & ( TCP_STATE_ACKED ( TCP_FIN ) | \
276 TCP_STATE_RCVD ( TCP_FIN ) ) ) \
277 == ( TCP_STATE_ACKED ( TCP_FIN ) | TCP_STATE_RCVD ( TCP_FIN ) ) )
281 /** Mask for TCP header length field */
282 #define TCP_MASK_HLEN 0xf0
284 /** Smallest port number on which a TCP connection can listen */
285 #define TCP_MIN_PORT 1
288 * Maxmimum advertised TCP window size
290 * The maximum bandwidth on any link is limited by
292 * max_bandwidth * round_trip_time = tcp_window
294 * Some rough expectations for achievable bandwidths over various
297 * a) Gigabit LAN: expected bandwidth 125MB/s, typical RTT 0.5ms,
298 * minimum required window 64kB
300 * b) Home Internet connection: expected bandwidth 10MB/s, typical
301 * RTT 25ms, minimum required window 256kB
303 * c) WAN: expected bandwidth 2MB/s, typical RTT 100ms, minimum
304 * required window 200kB.
306 * The maximum possible value for the TCP window size is 1GB (using
307 * the maximum window scale of 2**14). However, it is advisable to
308 * keep the window size as small as possible (without limiting
309 * bandwidth), since in the event of a lost packet the window size
310 * represents the maximum amount that will need to be retransmitted.
312 * We therefore choose a maximum window size of 256kB.
314 #define TCP_MAX_WINDOW_SIZE ( 256 * 1024 )
319 * IPv6 requires all data link layers to support a datagram size of
320 * 1280 bytes. We choose to use this as our maximum transmitted
321 * datagram size, on the assumption that any practical link layer we
322 * encounter will allow this size. This is a very conservative
323 * assumption in practice, but the impact of making such a
324 * conservative assumption is insignificant since the amount of data
325 * that we transmit (rather than receive) is negligible.
327 * We allow space within this 1280 bytes for an IPv6 header, a TCP
328 * header, and a (padded) TCP timestamp option.
330 #define TCP_PATH_MTU \
331 ( 1280 - 40 /* IPv6 */ - 20 /* TCP */ - 12 /* TCP timestamp */ )
333 /** TCP maximum segment lifetime
335 * Currently set to 2 minutes, as per RFC 793.
337 #define TCP_MSL ( 2 * 60 * TICKS_PER_SEC )
340 * TCP maximum header length
343 #define TCP_MAX_HEADER_LEN \
344 ( MAX_LL_NET_HEADER_LEN + \
345 sizeof ( struct tcp_header ) + \
346 sizeof ( struct tcp_mss_option ) + \
347 sizeof ( struct tcp_window_scale_padded_option ) + \
348 sizeof ( struct tcp_timestamp_padded_option ) )
351 * Compare TCP sequence numbers
353 * @v seq1 Sequence number 1
354 * @v seq2 Sequence number 2
355 * @ret diff Sequence difference
357 * Analogous to memcmp(), returns an integer less than, equal to, or
358 * greater than zero if @c seq1 is found, respectively, to be before,
359 * equal to, or after @c seq2.
361 static inline __attribute__ (( always_inline )) int32_t
362 tcp_cmp ( uint32_t seq1, uint32_t seq2 ) {
363 return ( ( int32_t ) ( seq1 - seq2 ) );
367 * Check if TCP sequence number lies within window
369 * @v seq Sequence number
370 * @v start Start of window
371 * @v len Length of window
372 * @ret in_window Sequence number is within window
374 static inline int tcp_in_window ( uint32_t seq, uint32_t start,
376 return ( ( seq - start ) < len );
379 extern struct tcpip_protocol tcp_protocol __tcpip_protocol;
381 #endif /* _IPXE_TCP_H */