Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / net / netfilter / ipvs / ip_vs_proto_sctp.c
1 #include <linux/kernel.h>
2 #include <linux/ip.h>
3 #include <linux/sctp.h>
4 #include <net/ip.h>
5 #include <net/ip6_checksum.h>
6 #include <linux/netfilter.h>
7 #include <linux/netfilter_ipv4.h>
8 #include <net/sctp/checksum.h>
9 #include <net/ip_vs.h>
10
11 static int
12 sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
13                    int *verdict, struct ip_vs_conn **cpp,
14                    struct ip_vs_iphdr *iph)
15 {
16         struct net *net;
17         struct ip_vs_service *svc;
18         struct netns_ipvs *ipvs;
19         sctp_chunkhdr_t _schunkh, *sch;
20         sctp_sctphdr_t *sh, _sctph;
21
22         sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
23         if (sh == NULL) {
24                 *verdict = NF_DROP;
25                 return 0;
26         }
27
28         sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
29                                  sizeof(_schunkh), &_schunkh);
30         if (sch == NULL) {
31                 *verdict = NF_DROP;
32                 return 0;
33         }
34
35         net = skb_net(skb);
36         ipvs = net_ipvs(net);
37         rcu_read_lock();
38         if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) &&
39             (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
40                                       &iph->daddr, sh->dest))) {
41                 int ignored;
42
43                 if (ip_vs_todrop(ipvs)) {
44                         /*
45                          * It seems that we are very loaded.
46                          * We have to drop this packet :(
47                          */
48                         rcu_read_unlock();
49                         *verdict = NF_DROP;
50                         return 0;
51                 }
52                 /*
53                  * Let the virtual server select a real server for the
54                  * incoming connection, and create a connection entry.
55                  */
56                 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
57                 if (!*cpp && ignored <= 0) {
58                         if (!ignored)
59                                 *verdict = ip_vs_leave(svc, skb, pd, iph);
60                         else
61                                 *verdict = NF_DROP;
62                         rcu_read_unlock();
63                         return 0;
64                 }
65         }
66         rcu_read_unlock();
67         /* NF_ACCEPT */
68         return 1;
69 }
70
71 static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
72                           unsigned int sctphoff)
73 {
74         sctph->checksum = sctp_compute_cksum(skb, sctphoff);
75         skb->ip_summed = CHECKSUM_UNNECESSARY;
76 }
77
78 static int
79 sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
80                   struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
81 {
82         sctp_sctphdr_t *sctph;
83         unsigned int sctphoff = iph->len;
84         bool payload_csum = false;
85
86 #ifdef CONFIG_IP_VS_IPV6
87         if (cp->af == AF_INET6 && iph->fragoffs)
88                 return 1;
89 #endif
90
91         /* csum_check requires unshared skb */
92         if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
93                 return 0;
94
95         if (unlikely(cp->app != NULL)) {
96                 int ret;
97
98                 /* Some checks before mangling */
99                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
100                         return 0;
101
102                 /* Call application helper if needed */
103                 ret = ip_vs_app_pkt_out(cp, skb);
104                 if (ret == 0)
105                         return 0;
106                 /* ret=2: csum update is needed after payload mangling */
107                 if (ret == 2)
108                         payload_csum = true;
109         }
110
111         sctph = (void *) skb_network_header(skb) + sctphoff;
112
113         /* Only update csum if we really have to */
114         if (sctph->source != cp->vport || payload_csum ||
115             skb->ip_summed == CHECKSUM_PARTIAL) {
116                 sctph->source = cp->vport;
117                 sctp_nat_csum(skb, sctph, sctphoff);
118         } else {
119                 skb->ip_summed = CHECKSUM_UNNECESSARY;
120         }
121
122         return 1;
123 }
124
125 static int
126 sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
127                   struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
128 {
129         sctp_sctphdr_t *sctph;
130         unsigned int sctphoff = iph->len;
131         bool payload_csum = false;
132
133 #ifdef CONFIG_IP_VS_IPV6
134         if (cp->af == AF_INET6 && iph->fragoffs)
135                 return 1;
136 #endif
137
138         /* csum_check requires unshared skb */
139         if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
140                 return 0;
141
142         if (unlikely(cp->app != NULL)) {
143                 int ret;
144
145                 /* Some checks before mangling */
146                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
147                         return 0;
148
149                 /* Call application helper if needed */
150                 ret = ip_vs_app_pkt_in(cp, skb);
151                 if (ret == 0)
152                         return 0;
153                 /* ret=2: csum update is needed after payload mangling */
154                 if (ret == 2)
155                         payload_csum = true;
156         }
157
158         sctph = (void *) skb_network_header(skb) + sctphoff;
159
160         /* Only update csum if we really have to */
161         if (sctph->dest != cp->dport || payload_csum ||
162             (skb->ip_summed == CHECKSUM_PARTIAL &&
163              !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) {
164                 sctph->dest = cp->dport;
165                 sctp_nat_csum(skb, sctph, sctphoff);
166         } else if (skb->ip_summed != CHECKSUM_PARTIAL) {
167                 skb->ip_summed = CHECKSUM_UNNECESSARY;
168         }
169
170         return 1;
171 }
172
173 static int
174 sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
175 {
176         unsigned int sctphoff;
177         struct sctphdr *sh, _sctph;
178         __le32 cmp, val;
179
180 #ifdef CONFIG_IP_VS_IPV6
181         if (af == AF_INET6)
182                 sctphoff = sizeof(struct ipv6hdr);
183         else
184 #endif
185                 sctphoff = ip_hdrlen(skb);
186
187         sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
188         if (sh == NULL)
189                 return 0;
190
191         cmp = sh->checksum;
192         val = sctp_compute_cksum(skb, sctphoff);
193
194         if (val != cmp) {
195                 /* CRC failure, dump it. */
196                 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
197                                 "Failed checksum for");
198                 return 0;
199         }
200         return 1;
201 }
202
203 enum ipvs_sctp_event_t {
204         IP_VS_SCTP_DATA = 0,            /* DATA, SACK, HEARTBEATs */
205         IP_VS_SCTP_INIT,
206         IP_VS_SCTP_INIT_ACK,
207         IP_VS_SCTP_COOKIE_ECHO,
208         IP_VS_SCTP_COOKIE_ACK,
209         IP_VS_SCTP_SHUTDOWN,
210         IP_VS_SCTP_SHUTDOWN_ACK,
211         IP_VS_SCTP_SHUTDOWN_COMPLETE,
212         IP_VS_SCTP_ERROR,
213         IP_VS_SCTP_ABORT,
214         IP_VS_SCTP_EVENT_LAST
215 };
216
217 /* RFC 2960, 3.2 Chunk Field Descriptions */
218 static __u8 sctp_events[] = {
219         [SCTP_CID_DATA]                 = IP_VS_SCTP_DATA,
220         [SCTP_CID_INIT]                 = IP_VS_SCTP_INIT,
221         [SCTP_CID_INIT_ACK]             = IP_VS_SCTP_INIT_ACK,
222         [SCTP_CID_SACK]                 = IP_VS_SCTP_DATA,
223         [SCTP_CID_HEARTBEAT]            = IP_VS_SCTP_DATA,
224         [SCTP_CID_HEARTBEAT_ACK]        = IP_VS_SCTP_DATA,
225         [SCTP_CID_ABORT]                = IP_VS_SCTP_ABORT,
226         [SCTP_CID_SHUTDOWN]             = IP_VS_SCTP_SHUTDOWN,
227         [SCTP_CID_SHUTDOWN_ACK]         = IP_VS_SCTP_SHUTDOWN_ACK,
228         [SCTP_CID_ERROR]                = IP_VS_SCTP_ERROR,
229         [SCTP_CID_COOKIE_ECHO]          = IP_VS_SCTP_COOKIE_ECHO,
230         [SCTP_CID_COOKIE_ACK]           = IP_VS_SCTP_COOKIE_ACK,
231         [SCTP_CID_ECN_ECNE]             = IP_VS_SCTP_DATA,
232         [SCTP_CID_ECN_CWR]              = IP_VS_SCTP_DATA,
233         [SCTP_CID_SHUTDOWN_COMPLETE]    = IP_VS_SCTP_SHUTDOWN_COMPLETE,
234 };
235
236 /* SCTP States:
237  * See RFC 2960, 4. SCTP Association State Diagram
238  *
239  * New states (not in diagram):
240  * - INIT1 state: use shorter timeout for dropped INIT packets
241  * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
242  * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
243  *
244  * The states are as seen in real server. In the diagram, INIT1, INIT,
245  * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
246  *
247  * States as per packets from client (C) and server (S):
248  *
249  * Setup of client connection:
250  * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
251  * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
252  * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
253  * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
254  *
255  * Setup of server connection:
256  * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
257  * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
258  * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
259  */
260
261 #define sNO IP_VS_SCTP_S_NONE
262 #define sI1 IP_VS_SCTP_S_INIT1
263 #define sIN IP_VS_SCTP_S_INIT
264 #define sCS IP_VS_SCTP_S_COOKIE_SENT
265 #define sCR IP_VS_SCTP_S_COOKIE_REPLIED
266 #define sCW IP_VS_SCTP_S_COOKIE_WAIT
267 #define sCO IP_VS_SCTP_S_COOKIE
268 #define sCE IP_VS_SCTP_S_COOKIE_ECHOED
269 #define sES IP_VS_SCTP_S_ESTABLISHED
270 #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
271 #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
272 #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
273 #define sRJ IP_VS_SCTP_S_REJECTED
274 #define sCL IP_VS_SCTP_S_CLOSED
275
276 static const __u8 sctp_states
277         [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
278         { /* INPUT */
279 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
280 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
281 /* i   */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
282 /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
283 /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
284 /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
285 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
286 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
287 /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
288 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
289 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
290         },
291         { /* OUTPUT */
292 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
293 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
294 /* i   */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
295 /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
296 /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
297 /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
298 /* s   */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
299 /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
300 /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
301 /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
302 /* ab  */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
303         },
304         { /* INPUT-ONLY */
305 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
306 /* d   */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
307 /* i   */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
308 /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
309 /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
310 /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
311 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
312 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
313 /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
314 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
315 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
316         },
317 };
318
319 #define IP_VS_SCTP_MAX_RTO      ((60 + 1) * HZ)
320
321 /* Timeout table[state] */
322 static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
323         [IP_VS_SCTP_S_NONE]                     = 2 * HZ,
324         [IP_VS_SCTP_S_INIT1]                    = (0 + 3 + 1) * HZ,
325         [IP_VS_SCTP_S_INIT]                     = IP_VS_SCTP_MAX_RTO,
326         [IP_VS_SCTP_S_COOKIE_SENT]              = IP_VS_SCTP_MAX_RTO,
327         [IP_VS_SCTP_S_COOKIE_REPLIED]           = IP_VS_SCTP_MAX_RTO,
328         [IP_VS_SCTP_S_COOKIE_WAIT]              = IP_VS_SCTP_MAX_RTO,
329         [IP_VS_SCTP_S_COOKIE]                   = IP_VS_SCTP_MAX_RTO,
330         [IP_VS_SCTP_S_COOKIE_ECHOED]            = IP_VS_SCTP_MAX_RTO,
331         [IP_VS_SCTP_S_ESTABLISHED]              = 15 * 60 * HZ,
332         [IP_VS_SCTP_S_SHUTDOWN_SENT]            = IP_VS_SCTP_MAX_RTO,
333         [IP_VS_SCTP_S_SHUTDOWN_RECEIVED]        = IP_VS_SCTP_MAX_RTO,
334         [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]        = IP_VS_SCTP_MAX_RTO,
335         [IP_VS_SCTP_S_REJECTED]                 = (0 + 3 + 1) * HZ,
336         [IP_VS_SCTP_S_CLOSED]                   = IP_VS_SCTP_MAX_RTO,
337         [IP_VS_SCTP_S_LAST]                     = 2 * HZ,
338 };
339
340 static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
341         [IP_VS_SCTP_S_NONE]                     = "NONE",
342         [IP_VS_SCTP_S_INIT1]                    = "INIT1",
343         [IP_VS_SCTP_S_INIT]                     = "INIT",
344         [IP_VS_SCTP_S_COOKIE_SENT]              = "C-SENT",
345         [IP_VS_SCTP_S_COOKIE_REPLIED]           = "C-REPLIED",
346         [IP_VS_SCTP_S_COOKIE_WAIT]              = "C-WAIT",
347         [IP_VS_SCTP_S_COOKIE]                   = "COOKIE",
348         [IP_VS_SCTP_S_COOKIE_ECHOED]            = "C-ECHOED",
349         [IP_VS_SCTP_S_ESTABLISHED]              = "ESTABLISHED",
350         [IP_VS_SCTP_S_SHUTDOWN_SENT]            = "S-SENT",
351         [IP_VS_SCTP_S_SHUTDOWN_RECEIVED]        = "S-RECEIVED",
352         [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]        = "S-ACK-SENT",
353         [IP_VS_SCTP_S_REJECTED]                 = "REJECTED",
354         [IP_VS_SCTP_S_CLOSED]                   = "CLOSED",
355         [IP_VS_SCTP_S_LAST]                     = "BUG!",
356 };
357
358
359 static const char *sctp_state_name(int state)
360 {
361         if (state >= IP_VS_SCTP_S_LAST)
362                 return "ERR!";
363         if (sctp_state_name_table[state])
364                 return sctp_state_name_table[state];
365         return "?";
366 }
367
368 static inline void
369 set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
370                 int direction, const struct sk_buff *skb)
371 {
372         sctp_chunkhdr_t _sctpch, *sch;
373         unsigned char chunk_type;
374         int event, next_state;
375         int ihl, cofs;
376
377 #ifdef CONFIG_IP_VS_IPV6
378         ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
379 #else
380         ihl = ip_hdrlen(skb);
381 #endif
382
383         cofs = ihl + sizeof(sctp_sctphdr_t);
384         sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
385         if (sch == NULL)
386                 return;
387
388         chunk_type = sch->type;
389         /*
390          * Section 3: Multiple chunks can be bundled into one SCTP packet
391          * up to the MTU size, except for the INIT, INIT ACK, and
392          * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
393          * any other chunk in a packet.
394          *
395          * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
396          * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
397          * bundled with an ABORT, but they MUST be placed before the ABORT
398          * in the SCTP packet or they will be ignored by the receiver.
399          */
400         if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
401             (sch->type == SCTP_CID_COOKIE_ACK)) {
402                 int clen = ntohs(sch->length);
403
404                 if (clen >= sizeof(sctp_chunkhdr_t)) {
405                         sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
406                                                  sizeof(_sctpch), &_sctpch);
407                         if (sch && sch->type == SCTP_CID_ABORT)
408                                 chunk_type = sch->type;
409                 }
410         }
411
412         event = (chunk_type < sizeof(sctp_events)) ?
413                 sctp_events[chunk_type] : IP_VS_SCTP_DATA;
414
415         /* Update direction to INPUT_ONLY if necessary
416          * or delete NO_OUTPUT flag if output packet detected
417          */
418         if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
419                 if (direction == IP_VS_DIR_OUTPUT)
420                         cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
421                 else
422                         direction = IP_VS_DIR_INPUT_ONLY;
423         }
424
425         next_state = sctp_states[direction][event][cp->state];
426
427         if (next_state != cp->state) {
428                 struct ip_vs_dest *dest = cp->dest;
429
430                 IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
431                                 "%s:%d state: %s->%s conn->refcnt:%d\n",
432                                 pd->pp->name,
433                                 ((direction == IP_VS_DIR_OUTPUT) ?
434                                  "output " : "input "),
435                                 IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
436                                 ntohs(cp->dport),
437                                 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
438                                 ntohs(cp->cport),
439                                 sctp_state_name(cp->state),
440                                 sctp_state_name(next_state),
441                                 atomic_read(&cp->refcnt));
442                 if (dest) {
443                         if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
444                                 (next_state != IP_VS_SCTP_S_ESTABLISHED)) {
445                                 atomic_dec(&dest->activeconns);
446                                 atomic_inc(&dest->inactconns);
447                                 cp->flags |= IP_VS_CONN_F_INACTIVE;
448                         } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
449                                    (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
450                                 atomic_inc(&dest->activeconns);
451                                 atomic_dec(&dest->inactconns);
452                                 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
453                         }
454                 }
455         }
456         if (likely(pd))
457                 cp->timeout = pd->timeout_table[cp->state = next_state];
458         else    /* What to do ? */
459                 cp->timeout = sctp_timeouts[cp->state = next_state];
460 }
461
462 static void
463 sctp_state_transition(struct ip_vs_conn *cp, int direction,
464                 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
465 {
466         spin_lock_bh(&cp->lock);
467         set_sctp_state(pd, cp, direction, skb);
468         spin_unlock_bh(&cp->lock);
469 }
470
471 static inline __u16 sctp_app_hashkey(__be16 port)
472 {
473         return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
474                 & SCTP_APP_TAB_MASK;
475 }
476
477 static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
478 {
479         struct ip_vs_app *i;
480         __u16 hash;
481         __be16 port = inc->port;
482         int ret = 0;
483         struct netns_ipvs *ipvs = net_ipvs(net);
484         struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
485
486         hash = sctp_app_hashkey(port);
487
488         list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
489                 if (i->port == port) {
490                         ret = -EEXIST;
491                         goto out;
492                 }
493         }
494         list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
495         atomic_inc(&pd->appcnt);
496 out:
497
498         return ret;
499 }
500
501 static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
502 {
503         struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
504
505         atomic_dec(&pd->appcnt);
506         list_del_rcu(&inc->p_list);
507 }
508
509 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
510 {
511         struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
512         int hash;
513         struct ip_vs_app *inc;
514         int result = 0;
515
516         /* Default binding: bind app only for NAT */
517         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
518                 return 0;
519         /* Lookup application incarnations and bind the right one */
520         hash = sctp_app_hashkey(cp->vport);
521
522         rcu_read_lock();
523         list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
524                 if (inc->port == cp->vport) {
525                         if (unlikely(!ip_vs_app_inc_get(inc)))
526                                 break;
527                         rcu_read_unlock();
528
529                         IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
530                                         "%s:%u to app %s on port %u\n",
531                                         __func__,
532                                         IP_VS_DBG_ADDR(cp->af, &cp->caddr),
533                                         ntohs(cp->cport),
534                                         IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
535                                         ntohs(cp->vport),
536                                         inc->name, ntohs(inc->port));
537                         cp->app = inc;
538                         if (inc->init_conn)
539                                 result = inc->init_conn(inc, cp);
540                         goto out;
541                 }
542         }
543         rcu_read_unlock();
544 out:
545         return result;
546 }
547
548 /* ---------------------------------------------
549  *   timeouts is netns related now.
550  * ---------------------------------------------
551  */
552 static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
553 {
554         struct netns_ipvs *ipvs = net_ipvs(net);
555
556         ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
557         pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
558                                                         sizeof(sctp_timeouts));
559         if (!pd->timeout_table)
560                 return -ENOMEM;
561         return 0;
562 }
563
564 static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
565 {
566         kfree(pd->timeout_table);
567 }
568
569 struct ip_vs_protocol ip_vs_protocol_sctp = {
570         .name           = "SCTP",
571         .protocol       = IPPROTO_SCTP,
572         .num_states     = IP_VS_SCTP_S_LAST,
573         .dont_defrag    = 0,
574         .init           = NULL,
575         .exit           = NULL,
576         .init_netns     = __ip_vs_sctp_init,
577         .exit_netns     = __ip_vs_sctp_exit,
578         .register_app   = sctp_register_app,
579         .unregister_app = sctp_unregister_app,
580         .conn_schedule  = sctp_conn_schedule,
581         .conn_in_get    = ip_vs_conn_in_get_proto,
582         .conn_out_get   = ip_vs_conn_out_get_proto,
583         .snat_handler   = sctp_snat_handler,
584         .dnat_handler   = sctp_dnat_handler,
585         .csum_check     = sctp_csum_check,
586         .state_name     = sctp_state_name,
587         .state_transition = sctp_state_transition,
588         .app_conn_bind  = sctp_app_conn_bind,
589         .debug_packet   = ip_vs_tcpudp_debug_packet,
590         .timeout_change = NULL,
591 };