Upgrade to 4.4.50-rt62
[kvmfornfv.git] / kernel / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/sock.h>
28 #include <net/xfrm.h>
29
30 #include <asm/ioctls.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 struct percpu_counter dccp_orphan_count;
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo dccp_hashinfo;
48 EXPORT_SYMBOL_GPL(dccp_hashinfo);
49
50 /* the maximum queue length for tx in packets. 0 is no limit */
51 int sysctl_dccp_tx_qlen __read_mostly = 5;
52
53 #ifdef CONFIG_IP_DCCP_DEBUG
54 static const char *dccp_state_name(const int state)
55 {
56         static const char *const dccp_state_names[] = {
57         [DCCP_OPEN]             = "OPEN",
58         [DCCP_REQUESTING]       = "REQUESTING",
59         [DCCP_PARTOPEN]         = "PARTOPEN",
60         [DCCP_LISTEN]           = "LISTEN",
61         [DCCP_RESPOND]          = "RESPOND",
62         [DCCP_CLOSING]          = "CLOSING",
63         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
64         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
65         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
66         [DCCP_TIME_WAIT]        = "TIME_WAIT",
67         [DCCP_CLOSED]           = "CLOSED",
68         };
69
70         if (state >= DCCP_MAX_STATES)
71                 return "INVALID STATE!";
72         else
73                 return dccp_state_names[state];
74 }
75 #endif
76
77 void dccp_set_state(struct sock *sk, const int state)
78 {
79         const int oldstate = sk->sk_state;
80
81         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
82                       dccp_state_name(oldstate), dccp_state_name(state));
83         WARN_ON(state == oldstate);
84
85         switch (state) {
86         case DCCP_OPEN:
87                 if (oldstate != DCCP_OPEN)
88                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
89                 /* Client retransmits all Confirm options until entering OPEN */
90                 if (oldstate == DCCP_PARTOPEN)
91                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
92                 break;
93
94         case DCCP_CLOSED:
95                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
96                     oldstate == DCCP_CLOSING)
97                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
98
99                 sk->sk_prot->unhash(sk);
100                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
101                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
102                         inet_put_port(sk);
103                 /* fall through */
104         default:
105                 if (oldstate == DCCP_OPEN)
106                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
107         }
108
109         /* Change state AFTER socket is unhashed to avoid closed
110          * socket sitting in hash tables.
111          */
112         sk->sk_state = state;
113 }
114
115 EXPORT_SYMBOL_GPL(dccp_set_state);
116
117 static void dccp_finish_passive_close(struct sock *sk)
118 {
119         switch (sk->sk_state) {
120         case DCCP_PASSIVE_CLOSE:
121                 /* Node (client or server) has received Close packet. */
122                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
123                 dccp_set_state(sk, DCCP_CLOSED);
124                 break;
125         case DCCP_PASSIVE_CLOSEREQ:
126                 /*
127                  * Client received CloseReq. We set the `active' flag so that
128                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
129                  */
130                 dccp_send_close(sk, 1);
131                 dccp_set_state(sk, DCCP_CLOSING);
132         }
133 }
134
135 void dccp_done(struct sock *sk)
136 {
137         dccp_set_state(sk, DCCP_CLOSED);
138         dccp_clear_xmit_timers(sk);
139
140         sk->sk_shutdown = SHUTDOWN_MASK;
141
142         if (!sock_flag(sk, SOCK_DEAD))
143                 sk->sk_state_change(sk);
144         else
145                 inet_csk_destroy_sock(sk);
146 }
147
148 EXPORT_SYMBOL_GPL(dccp_done);
149
150 const char *dccp_packet_name(const int type)
151 {
152         static const char *const dccp_packet_names[] = {
153                 [DCCP_PKT_REQUEST]  = "REQUEST",
154                 [DCCP_PKT_RESPONSE] = "RESPONSE",
155                 [DCCP_PKT_DATA]     = "DATA",
156                 [DCCP_PKT_ACK]      = "ACK",
157                 [DCCP_PKT_DATAACK]  = "DATAACK",
158                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
159                 [DCCP_PKT_CLOSE]    = "CLOSE",
160                 [DCCP_PKT_RESET]    = "RESET",
161                 [DCCP_PKT_SYNC]     = "SYNC",
162                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
163         };
164
165         if (type >= DCCP_NR_PKT_TYPES)
166                 return "INVALID";
167         else
168                 return dccp_packet_names[type];
169 }
170
171 EXPORT_SYMBOL_GPL(dccp_packet_name);
172
173 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174 {
175         struct dccp_sock *dp = dccp_sk(sk);
176         struct inet_connection_sock *icsk = inet_csk(sk);
177
178         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
179         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
180         sk->sk_state            = DCCP_CLOSED;
181         sk->sk_write_space      = dccp_write_space;
182         icsk->icsk_sync_mss     = dccp_sync_mss;
183         dp->dccps_mss_cache     = 536;
184         dp->dccps_rate_last     = jiffies;
185         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
186         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
187         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
188
189         dccp_init_xmit_timers(sk);
190
191         INIT_LIST_HEAD(&dp->dccps_featneg);
192         /* control socket doesn't need feat nego */
193         if (likely(ctl_sock_initialized))
194                 return dccp_feat_init(sk);
195         return 0;
196 }
197
198 EXPORT_SYMBOL_GPL(dccp_init_sock);
199
200 void dccp_destroy_sock(struct sock *sk)
201 {
202         struct dccp_sock *dp = dccp_sk(sk);
203
204         /*
205          * DCCP doesn't use sk_write_queue, just sk_send_head
206          * for retransmissions
207          */
208         if (sk->sk_send_head != NULL) {
209                 kfree_skb(sk->sk_send_head);
210                 sk->sk_send_head = NULL;
211         }
212
213         /* Clean up a referenced DCCP bind bucket. */
214         if (inet_csk(sk)->icsk_bind_hash != NULL)
215                 inet_put_port(sk);
216
217         kfree(dp->dccps_service_list);
218         dp->dccps_service_list = NULL;
219
220         if (dp->dccps_hc_rx_ackvec != NULL) {
221                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
222                 dp->dccps_hc_rx_ackvec = NULL;
223         }
224         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
225         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
226         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227
228         /* clean up feature negotiation state */
229         dccp_feat_list_purge(&dp->dccps_featneg);
230 }
231
232 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
233
234 static inline int dccp_listen_start(struct sock *sk, int backlog)
235 {
236         struct dccp_sock *dp = dccp_sk(sk);
237
238         dp->dccps_role = DCCP_ROLE_LISTEN;
239         /* do not start to listen if feature negotiation setup fails */
240         if (dccp_feat_finalise_settings(dp))
241                 return -EPROTO;
242         return inet_csk_listen_start(sk, backlog);
243 }
244
245 static inline int dccp_need_reset(int state)
246 {
247         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
248                state != DCCP_REQUESTING;
249 }
250
251 int dccp_disconnect(struct sock *sk, int flags)
252 {
253         struct inet_connection_sock *icsk = inet_csk(sk);
254         struct inet_sock *inet = inet_sk(sk);
255         int err = 0;
256         const int old_state = sk->sk_state;
257
258         if (old_state != DCCP_CLOSED)
259                 dccp_set_state(sk, DCCP_CLOSED);
260
261         /*
262          * This corresponds to the ABORT function of RFC793, sec. 3.8
263          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
264          */
265         if (old_state == DCCP_LISTEN) {
266                 inet_csk_listen_stop(sk);
267         } else if (dccp_need_reset(old_state)) {
268                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
269                 sk->sk_err = ECONNRESET;
270         } else if (old_state == DCCP_REQUESTING)
271                 sk->sk_err = ECONNRESET;
272
273         dccp_clear_xmit_timers(sk);
274
275         __skb_queue_purge(&sk->sk_receive_queue);
276         __skb_queue_purge(&sk->sk_write_queue);
277         if (sk->sk_send_head != NULL) {
278                 __kfree_skb(sk->sk_send_head);
279                 sk->sk_send_head = NULL;
280         }
281
282         inet->inet_dport = 0;
283
284         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
285                 inet_reset_saddr(sk);
286
287         sk->sk_shutdown = 0;
288         sock_reset_flag(sk, SOCK_DONE);
289
290         icsk->icsk_backoff = 0;
291         inet_csk_delack_init(sk);
292         __sk_dst_reset(sk);
293
294         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
295
296         sk->sk_error_report(sk);
297         return err;
298 }
299
300 EXPORT_SYMBOL_GPL(dccp_disconnect);
301
302 /*
303  *      Wait for a DCCP event.
304  *
305  *      Note that we don't need to lock the socket, as the upper poll layers
306  *      take care of normal races (between the test and the event) and we don't
307  *      go look at any of the socket buffers directly.
308  */
309 unsigned int dccp_poll(struct file *file, struct socket *sock,
310                        poll_table *wait)
311 {
312         unsigned int mask;
313         struct sock *sk = sock->sk;
314
315         sock_poll_wait(file, sk_sleep(sk), wait);
316         if (sk->sk_state == DCCP_LISTEN)
317                 return inet_csk_listen_poll(sk);
318
319         /* Socket is not locked. We are protected from async events
320            by poll logic and correct handling of state changes
321            made by another threads is impossible in any case.
322          */
323
324         mask = 0;
325         if (sk->sk_err)
326                 mask = POLLERR;
327
328         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
329                 mask |= POLLHUP;
330         if (sk->sk_shutdown & RCV_SHUTDOWN)
331                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
332
333         /* Connected? */
334         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
335                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
336                         mask |= POLLIN | POLLRDNORM;
337
338                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
339                         if (sk_stream_is_writeable(sk)) {
340                                 mask |= POLLOUT | POLLWRNORM;
341                         } else {  /* send SIGIO later */
342                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
343                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
344
345                                 /* Race breaker. If space is freed after
346                                  * wspace test but before the flags are set,
347                                  * IO signal will be lost.
348                                  */
349                                 if (sk_stream_is_writeable(sk))
350                                         mask |= POLLOUT | POLLWRNORM;
351                         }
352                 }
353         }
354         return mask;
355 }
356
357 EXPORT_SYMBOL_GPL(dccp_poll);
358
359 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
360 {
361         int rc = -ENOTCONN;
362
363         lock_sock(sk);
364
365         if (sk->sk_state == DCCP_LISTEN)
366                 goto out;
367
368         switch (cmd) {
369         case SIOCINQ: {
370                 struct sk_buff *skb;
371                 unsigned long amount = 0;
372
373                 skb = skb_peek(&sk->sk_receive_queue);
374                 if (skb != NULL) {
375                         /*
376                          * We will only return the amount of this packet since
377                          * that is all that will be read.
378                          */
379                         amount = skb->len;
380                 }
381                 rc = put_user(amount, (int __user *)arg);
382         }
383                 break;
384         default:
385                 rc = -ENOIOCTLCMD;
386                 break;
387         }
388 out:
389         release_sock(sk);
390         return rc;
391 }
392
393 EXPORT_SYMBOL_GPL(dccp_ioctl);
394
395 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
396                                    char __user *optval, unsigned int optlen)
397 {
398         struct dccp_sock *dp = dccp_sk(sk);
399         struct dccp_service_list *sl = NULL;
400
401         if (service == DCCP_SERVICE_INVALID_VALUE ||
402             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
403                 return -EINVAL;
404
405         if (optlen > sizeof(service)) {
406                 sl = kmalloc(optlen, GFP_KERNEL);
407                 if (sl == NULL)
408                         return -ENOMEM;
409
410                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
411                 if (copy_from_user(sl->dccpsl_list,
412                                    optval + sizeof(service),
413                                    optlen - sizeof(service)) ||
414                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
415                         kfree(sl);
416                         return -EFAULT;
417                 }
418         }
419
420         lock_sock(sk);
421         dp->dccps_service = service;
422
423         kfree(dp->dccps_service_list);
424
425         dp->dccps_service_list = sl;
426         release_sock(sk);
427         return 0;
428 }
429
430 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
431 {
432         u8 *list, len;
433         int i, rc;
434
435         if (cscov < 0 || cscov > 15)
436                 return -EINVAL;
437         /*
438          * Populate a list of permissible values, in the range cscov...15. This
439          * is necessary since feature negotiation of single values only works if
440          * both sides incidentally choose the same value. Since the list starts
441          * lowest-value first, negotiation will pick the smallest shared value.
442          */
443         if (cscov == 0)
444                 return 0;
445         len = 16 - cscov;
446
447         list = kmalloc(len, GFP_KERNEL);
448         if (list == NULL)
449                 return -ENOBUFS;
450
451         for (i = 0; i < len; i++)
452                 list[i] = cscov++;
453
454         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
455
456         if (rc == 0) {
457                 if (rx)
458                         dccp_sk(sk)->dccps_pcrlen = cscov;
459                 else
460                         dccp_sk(sk)->dccps_pcslen = cscov;
461         }
462         kfree(list);
463         return rc;
464 }
465
466 static int dccp_setsockopt_ccid(struct sock *sk, int type,
467                                 char __user *optval, unsigned int optlen)
468 {
469         u8 *val;
470         int rc = 0;
471
472         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
473                 return -EINVAL;
474
475         val = memdup_user(optval, optlen);
476         if (IS_ERR(val))
477                 return PTR_ERR(val);
478
479         lock_sock(sk);
480         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
481                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
482
483         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
484                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
485         release_sock(sk);
486
487         kfree(val);
488         return rc;
489 }
490
491 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
492                 char __user *optval, unsigned int optlen)
493 {
494         struct dccp_sock *dp = dccp_sk(sk);
495         int val, err = 0;
496
497         switch (optname) {
498         case DCCP_SOCKOPT_PACKET_SIZE:
499                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
500                 return 0;
501         case DCCP_SOCKOPT_CHANGE_L:
502         case DCCP_SOCKOPT_CHANGE_R:
503                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
504                 return 0;
505         case DCCP_SOCKOPT_CCID:
506         case DCCP_SOCKOPT_RX_CCID:
507         case DCCP_SOCKOPT_TX_CCID:
508                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
509         }
510
511         if (optlen < (int)sizeof(int))
512                 return -EINVAL;
513
514         if (get_user(val, (int __user *)optval))
515                 return -EFAULT;
516
517         if (optname == DCCP_SOCKOPT_SERVICE)
518                 return dccp_setsockopt_service(sk, val, optval, optlen);
519
520         lock_sock(sk);
521         switch (optname) {
522         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
523                 if (dp->dccps_role != DCCP_ROLE_SERVER)
524                         err = -EOPNOTSUPP;
525                 else
526                         dp->dccps_server_timewait = (val != 0);
527                 break;
528         case DCCP_SOCKOPT_SEND_CSCOV:
529                 err = dccp_setsockopt_cscov(sk, val, false);
530                 break;
531         case DCCP_SOCKOPT_RECV_CSCOV:
532                 err = dccp_setsockopt_cscov(sk, val, true);
533                 break;
534         case DCCP_SOCKOPT_QPOLICY_ID:
535                 if (sk->sk_state != DCCP_CLOSED)
536                         err = -EISCONN;
537                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
538                         err = -EINVAL;
539                 else
540                         dp->dccps_qpolicy = val;
541                 break;
542         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
543                 if (val < 0)
544                         err = -EINVAL;
545                 else
546                         dp->dccps_tx_qlen = val;
547                 break;
548         default:
549                 err = -ENOPROTOOPT;
550                 break;
551         }
552         release_sock(sk);
553
554         return err;
555 }
556
557 int dccp_setsockopt(struct sock *sk, int level, int optname,
558                     char __user *optval, unsigned int optlen)
559 {
560         if (level != SOL_DCCP)
561                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
562                                                              optname, optval,
563                                                              optlen);
564         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
565 }
566
567 EXPORT_SYMBOL_GPL(dccp_setsockopt);
568
569 #ifdef CONFIG_COMPAT
570 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
571                            char __user *optval, unsigned int optlen)
572 {
573         if (level != SOL_DCCP)
574                 return inet_csk_compat_setsockopt(sk, level, optname,
575                                                   optval, optlen);
576         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
577 }
578
579 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
580 #endif
581
582 static int dccp_getsockopt_service(struct sock *sk, int len,
583                                    __be32 __user *optval,
584                                    int __user *optlen)
585 {
586         const struct dccp_sock *dp = dccp_sk(sk);
587         const struct dccp_service_list *sl;
588         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
589
590         lock_sock(sk);
591         if ((sl = dp->dccps_service_list) != NULL) {
592                 slen = sl->dccpsl_nr * sizeof(u32);
593                 total_len += slen;
594         }
595
596         err = -EINVAL;
597         if (total_len > len)
598                 goto out;
599
600         err = 0;
601         if (put_user(total_len, optlen) ||
602             put_user(dp->dccps_service, optval) ||
603             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
604                 err = -EFAULT;
605 out:
606         release_sock(sk);
607         return err;
608 }
609
610 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
611                     char __user *optval, int __user *optlen)
612 {
613         struct dccp_sock *dp;
614         int val, len;
615
616         if (get_user(len, optlen))
617                 return -EFAULT;
618
619         if (len < (int)sizeof(int))
620                 return -EINVAL;
621
622         dp = dccp_sk(sk);
623
624         switch (optname) {
625         case DCCP_SOCKOPT_PACKET_SIZE:
626                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
627                 return 0;
628         case DCCP_SOCKOPT_SERVICE:
629                 return dccp_getsockopt_service(sk, len,
630                                                (__be32 __user *)optval, optlen);
631         case DCCP_SOCKOPT_GET_CUR_MPS:
632                 val = dp->dccps_mss_cache;
633                 break;
634         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
635                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
636         case DCCP_SOCKOPT_TX_CCID:
637                 val = ccid_get_current_tx_ccid(dp);
638                 if (val < 0)
639                         return -ENOPROTOOPT;
640                 break;
641         case DCCP_SOCKOPT_RX_CCID:
642                 val = ccid_get_current_rx_ccid(dp);
643                 if (val < 0)
644                         return -ENOPROTOOPT;
645                 break;
646         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
647                 val = dp->dccps_server_timewait;
648                 break;
649         case DCCP_SOCKOPT_SEND_CSCOV:
650                 val = dp->dccps_pcslen;
651                 break;
652         case DCCP_SOCKOPT_RECV_CSCOV:
653                 val = dp->dccps_pcrlen;
654                 break;
655         case DCCP_SOCKOPT_QPOLICY_ID:
656                 val = dp->dccps_qpolicy;
657                 break;
658         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
659                 val = dp->dccps_tx_qlen;
660                 break;
661         case 128 ... 191:
662                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
663                                              len, (u32 __user *)optval, optlen);
664         case 192 ... 255:
665                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
666                                              len, (u32 __user *)optval, optlen);
667         default:
668                 return -ENOPROTOOPT;
669         }
670
671         len = sizeof(val);
672         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
673                 return -EFAULT;
674
675         return 0;
676 }
677
678 int dccp_getsockopt(struct sock *sk, int level, int optname,
679                     char __user *optval, int __user *optlen)
680 {
681         if (level != SOL_DCCP)
682                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
683                                                              optname, optval,
684                                                              optlen);
685         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
686 }
687
688 EXPORT_SYMBOL_GPL(dccp_getsockopt);
689
690 #ifdef CONFIG_COMPAT
691 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
692                            char __user *optval, int __user *optlen)
693 {
694         if (level != SOL_DCCP)
695                 return inet_csk_compat_getsockopt(sk, level, optname,
696                                                   optval, optlen);
697         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
698 }
699
700 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
701 #endif
702
703 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
704 {
705         struct cmsghdr *cmsg;
706
707         /*
708          * Assign an (opaque) qpolicy priority value to skb->priority.
709          *
710          * We are overloading this skb field for use with the qpolicy subystem.
711          * The skb->priority is normally used for the SO_PRIORITY option, which
712          * is initialised from sk_priority. Since the assignment of sk_priority
713          * to skb->priority happens later (on layer 3), we overload this field
714          * for use with queueing priorities as long as the skb is on layer 4.
715          * The default priority value (if nothing is set) is 0.
716          */
717         skb->priority = 0;
718
719         for_each_cmsghdr(cmsg, msg) {
720                 if (!CMSG_OK(msg, cmsg))
721                         return -EINVAL;
722
723                 if (cmsg->cmsg_level != SOL_DCCP)
724                         continue;
725
726                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
727                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
728                         return -EINVAL;
729
730                 switch (cmsg->cmsg_type) {
731                 case DCCP_SCM_PRIORITY:
732                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
733                                 return -EINVAL;
734                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
735                         break;
736                 default:
737                         return -EINVAL;
738                 }
739         }
740         return 0;
741 }
742
743 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
744 {
745         const struct dccp_sock *dp = dccp_sk(sk);
746         const int flags = msg->msg_flags;
747         const int noblock = flags & MSG_DONTWAIT;
748         struct sk_buff *skb;
749         int rc, size;
750         long timeo;
751
752         if (len > dp->dccps_mss_cache)
753                 return -EMSGSIZE;
754
755         lock_sock(sk);
756
757         if (dccp_qpolicy_full(sk)) {
758                 rc = -EAGAIN;
759                 goto out_release;
760         }
761
762         timeo = sock_sndtimeo(sk, noblock);
763
764         /*
765          * We have to use sk_stream_wait_connect here to set sk_write_pending,
766          * so that the trick in dccp_rcv_request_sent_state_process.
767          */
768         /* Wait for a connection to finish. */
769         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
770                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
771                         goto out_release;
772
773         size = sk->sk_prot->max_header + len;
774         release_sock(sk);
775         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
776         lock_sock(sk);
777         if (skb == NULL)
778                 goto out_release;
779
780         skb_reserve(skb, sk->sk_prot->max_header);
781         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
782         if (rc != 0)
783                 goto out_discard;
784
785         rc = dccp_msghdr_parse(msg, skb);
786         if (rc != 0)
787                 goto out_discard;
788
789         dccp_qpolicy_push(sk, skb);
790         /*
791          * The xmit_timer is set if the TX CCID is rate-based and will expire
792          * when congestion control permits to release further packets into the
793          * network. Window-based CCIDs do not use this timer.
794          */
795         if (!timer_pending(&dp->dccps_xmit_timer))
796                 dccp_write_xmit(sk);
797 out_release:
798         release_sock(sk);
799         return rc ? : len;
800 out_discard:
801         kfree_skb(skb);
802         goto out_release;
803 }
804
805 EXPORT_SYMBOL_GPL(dccp_sendmsg);
806
807 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
808                  int flags, int *addr_len)
809 {
810         const struct dccp_hdr *dh;
811         long timeo;
812
813         lock_sock(sk);
814
815         if (sk->sk_state == DCCP_LISTEN) {
816                 len = -ENOTCONN;
817                 goto out;
818         }
819
820         timeo = sock_rcvtimeo(sk, nonblock);
821
822         do {
823                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
824
825                 if (skb == NULL)
826                         goto verify_sock_status;
827
828                 dh = dccp_hdr(skb);
829
830                 switch (dh->dccph_type) {
831                 case DCCP_PKT_DATA:
832                 case DCCP_PKT_DATAACK:
833                         goto found_ok_skb;
834
835                 case DCCP_PKT_CLOSE:
836                 case DCCP_PKT_CLOSEREQ:
837                         if (!(flags & MSG_PEEK))
838                                 dccp_finish_passive_close(sk);
839                         /* fall through */
840                 case DCCP_PKT_RESET:
841                         dccp_pr_debug("found fin (%s) ok!\n",
842                                       dccp_packet_name(dh->dccph_type));
843                         len = 0;
844                         goto found_fin_ok;
845                 default:
846                         dccp_pr_debug("packet_type=%s\n",
847                                       dccp_packet_name(dh->dccph_type));
848                         sk_eat_skb(sk, skb);
849                 }
850 verify_sock_status:
851                 if (sock_flag(sk, SOCK_DONE)) {
852                         len = 0;
853                         break;
854                 }
855
856                 if (sk->sk_err) {
857                         len = sock_error(sk);
858                         break;
859                 }
860
861                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
862                         len = 0;
863                         break;
864                 }
865
866                 if (sk->sk_state == DCCP_CLOSED) {
867                         if (!sock_flag(sk, SOCK_DONE)) {
868                                 /* This occurs when user tries to read
869                                  * from never connected socket.
870                                  */
871                                 len = -ENOTCONN;
872                                 break;
873                         }
874                         len = 0;
875                         break;
876                 }
877
878                 if (!timeo) {
879                         len = -EAGAIN;
880                         break;
881                 }
882
883                 if (signal_pending(current)) {
884                         len = sock_intr_errno(timeo);
885                         break;
886                 }
887
888                 sk_wait_data(sk, &timeo, NULL);
889                 continue;
890         found_ok_skb:
891                 if (len > skb->len)
892                         len = skb->len;
893                 else if (len < skb->len)
894                         msg->msg_flags |= MSG_TRUNC;
895
896                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
897                         /* Exception. Bailout! */
898                         len = -EFAULT;
899                         break;
900                 }
901                 if (flags & MSG_TRUNC)
902                         len = skb->len;
903         found_fin_ok:
904                 if (!(flags & MSG_PEEK))
905                         sk_eat_skb(sk, skb);
906                 break;
907         } while (1);
908 out:
909         release_sock(sk);
910         return len;
911 }
912
913 EXPORT_SYMBOL_GPL(dccp_recvmsg);
914
915 int inet_dccp_listen(struct socket *sock, int backlog)
916 {
917         struct sock *sk = sock->sk;
918         unsigned char old_state;
919         int err;
920
921         lock_sock(sk);
922
923         err = -EINVAL;
924         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
925                 goto out;
926
927         old_state = sk->sk_state;
928         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
929                 goto out;
930
931         /* Really, if the socket is already in listen state
932          * we can only allow the backlog to be adjusted.
933          */
934         if (old_state != DCCP_LISTEN) {
935                 /*
936                  * FIXME: here it probably should be sk->sk_prot->listen_start
937                  * see tcp_listen_start
938                  */
939                 err = dccp_listen_start(sk, backlog);
940                 if (err)
941                         goto out;
942         }
943         sk->sk_max_ack_backlog = backlog;
944         err = 0;
945
946 out:
947         release_sock(sk);
948         return err;
949 }
950
951 EXPORT_SYMBOL_GPL(inet_dccp_listen);
952
953 static void dccp_terminate_connection(struct sock *sk)
954 {
955         u8 next_state = DCCP_CLOSED;
956
957         switch (sk->sk_state) {
958         case DCCP_PASSIVE_CLOSE:
959         case DCCP_PASSIVE_CLOSEREQ:
960                 dccp_finish_passive_close(sk);
961                 break;
962         case DCCP_PARTOPEN:
963                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
964                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
965                 /* fall through */
966         case DCCP_OPEN:
967                 dccp_send_close(sk, 1);
968
969                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
970                     !dccp_sk(sk)->dccps_server_timewait)
971                         next_state = DCCP_ACTIVE_CLOSEREQ;
972                 else
973                         next_state = DCCP_CLOSING;
974                 /* fall through */
975         default:
976                 dccp_set_state(sk, next_state);
977         }
978 }
979
980 void dccp_close(struct sock *sk, long timeout)
981 {
982         struct dccp_sock *dp = dccp_sk(sk);
983         struct sk_buff *skb;
984         u32 data_was_unread = 0;
985         int state;
986
987         lock_sock(sk);
988
989         sk->sk_shutdown = SHUTDOWN_MASK;
990
991         if (sk->sk_state == DCCP_LISTEN) {
992                 dccp_set_state(sk, DCCP_CLOSED);
993
994                 /* Special case. */
995                 inet_csk_listen_stop(sk);
996
997                 goto adjudge_to_death;
998         }
999
1000         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1001
1002         /*
1003          * We need to flush the recv. buffs.  We do this only on the
1004          * descriptor close, not protocol-sourced closes, because the
1005           *reader process may not have drained the data yet!
1006          */
1007         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1008                 data_was_unread += skb->len;
1009                 __kfree_skb(skb);
1010         }
1011
1012         /* If socket has been already reset kill it. */
1013         if (sk->sk_state == DCCP_CLOSED)
1014                 goto adjudge_to_death;
1015
1016         if (data_was_unread) {
1017                 /* Unread data was tossed, send an appropriate Reset Code */
1018                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1019                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1020                 dccp_set_state(sk, DCCP_CLOSED);
1021         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1022                 /* Check zero linger _after_ checking for unread data. */
1023                 sk->sk_prot->disconnect(sk, 0);
1024         } else if (sk->sk_state != DCCP_CLOSED) {
1025                 /*
1026                  * Normal connection termination. May need to wait if there are
1027                  * still packets in the TX queue that are delayed by the CCID.
1028                  */
1029                 dccp_flush_write_queue(sk, &timeout);
1030                 dccp_terminate_connection(sk);
1031         }
1032
1033         /*
1034          * Flush write queue. This may be necessary in several cases:
1035          * - we have been closed by the peer but still have application data;
1036          * - abortive termination (unread data or zero linger time),
1037          * - normal termination but queue could not be flushed within time limit
1038          */
1039         __skb_queue_purge(&sk->sk_write_queue);
1040
1041         sk_stream_wait_close(sk, timeout);
1042
1043 adjudge_to_death:
1044         state = sk->sk_state;
1045         sock_hold(sk);
1046         sock_orphan(sk);
1047
1048         /*
1049          * It is the last release_sock in its life. It will remove backlog.
1050          */
1051         release_sock(sk);
1052         /*
1053          * Now socket is owned by kernel and we acquire BH lock
1054          * to finish close. No need to check for user refs.
1055          */
1056         local_bh_disable();
1057         bh_lock_sock(sk);
1058         WARN_ON(sock_owned_by_user(sk));
1059
1060         percpu_counter_inc(sk->sk_prot->orphan_count);
1061
1062         /* Have we already been destroyed by a softirq or backlog? */
1063         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1064                 goto out;
1065
1066         if (sk->sk_state == DCCP_CLOSED)
1067                 inet_csk_destroy_sock(sk);
1068
1069         /* Otherwise, socket is reprieved until protocol close. */
1070
1071 out:
1072         bh_unlock_sock(sk);
1073         local_bh_enable();
1074         sock_put(sk);
1075 }
1076
1077 EXPORT_SYMBOL_GPL(dccp_close);
1078
1079 void dccp_shutdown(struct sock *sk, int how)
1080 {
1081         dccp_pr_debug("called shutdown(%x)\n", how);
1082 }
1083
1084 EXPORT_SYMBOL_GPL(dccp_shutdown);
1085
1086 static inline int __init dccp_mib_init(void)
1087 {
1088         dccp_statistics = alloc_percpu(struct dccp_mib);
1089         if (!dccp_statistics)
1090                 return -ENOMEM;
1091         return 0;
1092 }
1093
1094 static inline void dccp_mib_exit(void)
1095 {
1096         free_percpu(dccp_statistics);
1097 }
1098
1099 static int thash_entries;
1100 module_param(thash_entries, int, 0444);
1101 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1102
1103 #ifdef CONFIG_IP_DCCP_DEBUG
1104 bool dccp_debug;
1105 module_param(dccp_debug, bool, 0644);
1106 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1107
1108 EXPORT_SYMBOL_GPL(dccp_debug);
1109 #endif
1110
1111 static int __init dccp_init(void)
1112 {
1113         unsigned long goal;
1114         int ehash_order, bhash_order, i;
1115         int rc;
1116
1117         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1118                      FIELD_SIZEOF(struct sk_buff, cb));
1119         rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1120         if (rc)
1121                 goto out_fail;
1122         rc = -ENOBUFS;
1123         inet_hashinfo_init(&dccp_hashinfo);
1124         dccp_hashinfo.bind_bucket_cachep =
1125                 kmem_cache_create("dccp_bind_bucket",
1126                                   sizeof(struct inet_bind_bucket), 0,
1127                                   SLAB_HWCACHE_ALIGN, NULL);
1128         if (!dccp_hashinfo.bind_bucket_cachep)
1129                 goto out_free_percpu;
1130
1131         /*
1132          * Size and allocate the main established and bind bucket
1133          * hash tables.
1134          *
1135          * The methodology is similar to that of the buffer cache.
1136          */
1137         if (totalram_pages >= (128 * 1024))
1138                 goal = totalram_pages >> (21 - PAGE_SHIFT);
1139         else
1140                 goal = totalram_pages >> (23 - PAGE_SHIFT);
1141
1142         if (thash_entries)
1143                 goal = (thash_entries *
1144                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1145         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1146                 ;
1147         do {
1148                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1149                                         sizeof(struct inet_ehash_bucket);
1150
1151                 while (hash_size & (hash_size - 1))
1152                         hash_size--;
1153                 dccp_hashinfo.ehash_mask = hash_size - 1;
1154                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1155                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1156         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1157
1158         if (!dccp_hashinfo.ehash) {
1159                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1160                 goto out_free_bind_bucket_cachep;
1161         }
1162
1163         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1164                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1165
1166         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1167                         goto out_free_dccp_ehash;
1168
1169         bhash_order = ehash_order;
1170
1171         do {
1172                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1173                                         sizeof(struct inet_bind_hashbucket);
1174                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1175                     bhash_order > 0)
1176                         continue;
1177                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1178                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1179         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1180
1181         if (!dccp_hashinfo.bhash) {
1182                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1183                 goto out_free_dccp_locks;
1184         }
1185
1186         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1187                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1188                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1189         }
1190
1191         rc = dccp_mib_init();
1192         if (rc)
1193                 goto out_free_dccp_bhash;
1194
1195         rc = dccp_ackvec_init();
1196         if (rc)
1197                 goto out_free_dccp_mib;
1198
1199         rc = dccp_sysctl_init();
1200         if (rc)
1201                 goto out_ackvec_exit;
1202
1203         rc = ccid_initialize_builtins();
1204         if (rc)
1205                 goto out_sysctl_exit;
1206
1207         dccp_timestamping_init();
1208
1209         return 0;
1210
1211 out_sysctl_exit:
1212         dccp_sysctl_exit();
1213 out_ackvec_exit:
1214         dccp_ackvec_exit();
1215 out_free_dccp_mib:
1216         dccp_mib_exit();
1217 out_free_dccp_bhash:
1218         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1219 out_free_dccp_locks:
1220         inet_ehash_locks_free(&dccp_hashinfo);
1221 out_free_dccp_ehash:
1222         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1223 out_free_bind_bucket_cachep:
1224         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1225 out_free_percpu:
1226         percpu_counter_destroy(&dccp_orphan_count);
1227 out_fail:
1228         dccp_hashinfo.bhash = NULL;
1229         dccp_hashinfo.ehash = NULL;
1230         dccp_hashinfo.bind_bucket_cachep = NULL;
1231         return rc;
1232 }
1233
1234 static void __exit dccp_fini(void)
1235 {
1236         ccid_cleanup_builtins();
1237         dccp_mib_exit();
1238         free_pages((unsigned long)dccp_hashinfo.bhash,
1239                    get_order(dccp_hashinfo.bhash_size *
1240                              sizeof(struct inet_bind_hashbucket)));
1241         free_pages((unsigned long)dccp_hashinfo.ehash,
1242                    get_order((dccp_hashinfo.ehash_mask + 1) *
1243                              sizeof(struct inet_ehash_bucket)));
1244         inet_ehash_locks_free(&dccp_hashinfo);
1245         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1246         dccp_ackvec_exit();
1247         dccp_sysctl_exit();
1248         percpu_counter_destroy(&dccp_orphan_count);
1249 }
1250
1251 module_init(dccp_init);
1252 module_exit(dccp_fini);
1253
1254 MODULE_LICENSE("GPL");
1255 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1256 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");