These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / infiniband / core / uverbs_cmd.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved.
4  * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
5  * Copyright (c) 2006 Mellanox Technologies.  All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/file.h>
37 #include <linux/fs.h>
38 #include <linux/slab.h>
39 #include <linux/sched.h>
40
41 #include <asm/uaccess.h>
42
43 #include "uverbs.h"
44 #include "core_priv.h"
45
46 struct uverbs_lock_class {
47         struct lock_class_key   key;
48         char                    name[16];
49 };
50
51 static struct uverbs_lock_class pd_lock_class   = { .name = "PD-uobj" };
52 static struct uverbs_lock_class mr_lock_class   = { .name = "MR-uobj" };
53 static struct uverbs_lock_class mw_lock_class   = { .name = "MW-uobj" };
54 static struct uverbs_lock_class cq_lock_class   = { .name = "CQ-uobj" };
55 static struct uverbs_lock_class qp_lock_class   = { .name = "QP-uobj" };
56 static struct uverbs_lock_class ah_lock_class   = { .name = "AH-uobj" };
57 static struct uverbs_lock_class srq_lock_class  = { .name = "SRQ-uobj" };
58 static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
59 static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
60
61 /*
62  * The ib_uobject locking scheme is as follows:
63  *
64  * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
65  *   needs to be held during all idr write operations.  When an object is
66  *   looked up, a reference must be taken on the object's kref before
67  *   dropping this lock.  For read operations, the rcu_read_lock()
68  *   and rcu_write_lock() but similarly the kref reference is grabbed
69  *   before the rcu_read_unlock().
70  *
71  * - Each object also has an rwsem.  This rwsem must be held for
72  *   reading while an operation that uses the object is performed.
73  *   For example, while registering an MR, the associated PD's
74  *   uobject.mutex must be held for reading.  The rwsem must be held
75  *   for writing while initializing or destroying an object.
76  *
77  * - In addition, each object has a "live" flag.  If this flag is not
78  *   set, then lookups of the object will fail even if it is found in
79  *   the idr.  This handles a reader that blocks and does not acquire
80  *   the rwsem until after the object is destroyed.  The destroy
81  *   operation will set the live flag to 0 and then drop the rwsem;
82  *   this will allow the reader to acquire the rwsem, see that the
83  *   live flag is 0, and then drop the rwsem and its reference to
84  *   object.  The underlying storage will not be freed until the last
85  *   reference to the object is dropped.
86  */
87
88 static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
89                       struct ib_ucontext *context, struct uverbs_lock_class *c)
90 {
91         uobj->user_handle = user_handle;
92         uobj->context     = context;
93         kref_init(&uobj->ref);
94         init_rwsem(&uobj->mutex);
95         lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
96         uobj->live        = 0;
97 }
98
99 static void release_uobj(struct kref *kref)
100 {
101         kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
102 }
103
104 static void put_uobj(struct ib_uobject *uobj)
105 {
106         kref_put(&uobj->ref, release_uobj);
107 }
108
109 static void put_uobj_read(struct ib_uobject *uobj)
110 {
111         up_read(&uobj->mutex);
112         put_uobj(uobj);
113 }
114
115 static void put_uobj_write(struct ib_uobject *uobj)
116 {
117         up_write(&uobj->mutex);
118         put_uobj(uobj);
119 }
120
121 static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
122 {
123         int ret;
124
125         idr_preload(GFP_KERNEL);
126         spin_lock(&ib_uverbs_idr_lock);
127
128         ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
129         if (ret >= 0)
130                 uobj->id = ret;
131
132         spin_unlock(&ib_uverbs_idr_lock);
133         idr_preload_end();
134
135         return ret < 0 ? ret : 0;
136 }
137
138 void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
139 {
140         spin_lock(&ib_uverbs_idr_lock);
141         idr_remove(idr, uobj->id);
142         spin_unlock(&ib_uverbs_idr_lock);
143 }
144
145 static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
146                                          struct ib_ucontext *context)
147 {
148         struct ib_uobject *uobj;
149
150         rcu_read_lock();
151         uobj = idr_find(idr, id);
152         if (uobj) {
153                 if (uobj->context == context)
154                         kref_get(&uobj->ref);
155                 else
156                         uobj = NULL;
157         }
158         rcu_read_unlock();
159
160         return uobj;
161 }
162
163 static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
164                                         struct ib_ucontext *context, int nested)
165 {
166         struct ib_uobject *uobj;
167
168         uobj = __idr_get_uobj(idr, id, context);
169         if (!uobj)
170                 return NULL;
171
172         if (nested)
173                 down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
174         else
175                 down_read(&uobj->mutex);
176         if (!uobj->live) {
177                 put_uobj_read(uobj);
178                 return NULL;
179         }
180
181         return uobj;
182 }
183
184 static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
185                                          struct ib_ucontext *context)
186 {
187         struct ib_uobject *uobj;
188
189         uobj = __idr_get_uobj(idr, id, context);
190         if (!uobj)
191                 return NULL;
192
193         down_write(&uobj->mutex);
194         if (!uobj->live) {
195                 put_uobj_write(uobj);
196                 return NULL;
197         }
198
199         return uobj;
200 }
201
202 static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
203                           int nested)
204 {
205         struct ib_uobject *uobj;
206
207         uobj = idr_read_uobj(idr, id, context, nested);
208         return uobj ? uobj->object : NULL;
209 }
210
211 static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
212 {
213         return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
214 }
215
216 static void put_pd_read(struct ib_pd *pd)
217 {
218         put_uobj_read(pd->uobject);
219 }
220
221 static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
222 {
223         return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
224 }
225
226 static void put_cq_read(struct ib_cq *cq)
227 {
228         put_uobj_read(cq->uobject);
229 }
230
231 static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
232 {
233         return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
234 }
235
236 static void put_ah_read(struct ib_ah *ah)
237 {
238         put_uobj_read(ah->uobject);
239 }
240
241 static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
242 {
243         return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
244 }
245
246 static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
247 {
248         struct ib_uobject *uobj;
249
250         uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
251         return uobj ? uobj->object : NULL;
252 }
253
254 static void put_qp_read(struct ib_qp *qp)
255 {
256         put_uobj_read(qp->uobject);
257 }
258
259 static void put_qp_write(struct ib_qp *qp)
260 {
261         put_uobj_write(qp->uobject);
262 }
263
264 static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
265 {
266         return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
267 }
268
269 static void put_srq_read(struct ib_srq *srq)
270 {
271         put_uobj_read(srq->uobject);
272 }
273
274 static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
275                                      struct ib_uobject **uobj)
276 {
277         *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
278         return *uobj ? (*uobj)->object : NULL;
279 }
280
281 static void put_xrcd_read(struct ib_uobject *uobj)
282 {
283         put_uobj_read(uobj);
284 }
285
286 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
287                               struct ib_device *ib_dev,
288                               const char __user *buf,
289                               int in_len, int out_len)
290 {
291         struct ib_uverbs_get_context      cmd;
292         struct ib_uverbs_get_context_resp resp;
293         struct ib_udata                   udata;
294 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
295         struct ib_device_attr             dev_attr;
296 #endif
297         struct ib_ucontext               *ucontext;
298         struct file                      *filp;
299         int ret;
300
301         if (out_len < sizeof resp)
302                 return -ENOSPC;
303
304         if (copy_from_user(&cmd, buf, sizeof cmd))
305                 return -EFAULT;
306
307         mutex_lock(&file->mutex);
308
309         if (file->ucontext) {
310                 ret = -EINVAL;
311                 goto err;
312         }
313
314         INIT_UDATA(&udata, buf + sizeof cmd,
315                    (unsigned long) cmd.response + sizeof resp,
316                    in_len - sizeof cmd, out_len - sizeof resp);
317
318         ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
319         if (IS_ERR(ucontext)) {
320                 ret = PTR_ERR(ucontext);
321                 goto err;
322         }
323
324         ucontext->device = ib_dev;
325         INIT_LIST_HEAD(&ucontext->pd_list);
326         INIT_LIST_HEAD(&ucontext->mr_list);
327         INIT_LIST_HEAD(&ucontext->mw_list);
328         INIT_LIST_HEAD(&ucontext->cq_list);
329         INIT_LIST_HEAD(&ucontext->qp_list);
330         INIT_LIST_HEAD(&ucontext->srq_list);
331         INIT_LIST_HEAD(&ucontext->ah_list);
332         INIT_LIST_HEAD(&ucontext->xrcd_list);
333         INIT_LIST_HEAD(&ucontext->rule_list);
334         rcu_read_lock();
335         ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
336         rcu_read_unlock();
337         ucontext->closing = 0;
338
339 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
340         ucontext->umem_tree = RB_ROOT;
341         init_rwsem(&ucontext->umem_rwsem);
342         ucontext->odp_mrs_count = 0;
343         INIT_LIST_HEAD(&ucontext->no_private_counters);
344
345         ret = ib_query_device(ib_dev, &dev_attr);
346         if (ret)
347                 goto err_free;
348         if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
349                 ucontext->invalidate_range = NULL;
350
351 #endif
352
353         resp.num_comp_vectors = file->device->num_comp_vectors;
354
355         ret = get_unused_fd_flags(O_CLOEXEC);
356         if (ret < 0)
357                 goto err_free;
358         resp.async_fd = ret;
359
360         filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
361         if (IS_ERR(filp)) {
362                 ret = PTR_ERR(filp);
363                 goto err_fd;
364         }
365
366         if (copy_to_user((void __user *) (unsigned long) cmd.response,
367                          &resp, sizeof resp)) {
368                 ret = -EFAULT;
369                 goto err_file;
370         }
371
372         file->ucontext = ucontext;
373
374         fd_install(resp.async_fd, filp);
375
376         mutex_unlock(&file->mutex);
377
378         return in_len;
379
380 err_file:
381         ib_uverbs_free_async_event_file(file);
382         fput(filp);
383
384 err_fd:
385         put_unused_fd(resp.async_fd);
386
387 err_free:
388         put_pid(ucontext->tgid);
389         ib_dev->dealloc_ucontext(ucontext);
390
391 err:
392         mutex_unlock(&file->mutex);
393         return ret;
394 }
395
396 static void copy_query_dev_fields(struct ib_uverbs_file *file,
397                                   struct ib_device *ib_dev,
398                                   struct ib_uverbs_query_device_resp *resp,
399                                   struct ib_device_attr *attr)
400 {
401         resp->fw_ver            = attr->fw_ver;
402         resp->node_guid         = ib_dev->node_guid;
403         resp->sys_image_guid    = attr->sys_image_guid;
404         resp->max_mr_size       = attr->max_mr_size;
405         resp->page_size_cap     = attr->page_size_cap;
406         resp->vendor_id         = attr->vendor_id;
407         resp->vendor_part_id    = attr->vendor_part_id;
408         resp->hw_ver            = attr->hw_ver;
409         resp->max_qp            = attr->max_qp;
410         resp->max_qp_wr         = attr->max_qp_wr;
411         resp->device_cap_flags  = attr->device_cap_flags;
412         resp->max_sge           = attr->max_sge;
413         resp->max_sge_rd        = attr->max_sge_rd;
414         resp->max_cq            = attr->max_cq;
415         resp->max_cqe           = attr->max_cqe;
416         resp->max_mr            = attr->max_mr;
417         resp->max_pd            = attr->max_pd;
418         resp->max_qp_rd_atom    = attr->max_qp_rd_atom;
419         resp->max_ee_rd_atom    = attr->max_ee_rd_atom;
420         resp->max_res_rd_atom   = attr->max_res_rd_atom;
421         resp->max_qp_init_rd_atom       = attr->max_qp_init_rd_atom;
422         resp->max_ee_init_rd_atom       = attr->max_ee_init_rd_atom;
423         resp->atomic_cap                = attr->atomic_cap;
424         resp->max_ee                    = attr->max_ee;
425         resp->max_rdd                   = attr->max_rdd;
426         resp->max_mw                    = attr->max_mw;
427         resp->max_raw_ipv6_qp           = attr->max_raw_ipv6_qp;
428         resp->max_raw_ethy_qp           = attr->max_raw_ethy_qp;
429         resp->max_mcast_grp             = attr->max_mcast_grp;
430         resp->max_mcast_qp_attach       = attr->max_mcast_qp_attach;
431         resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
432         resp->max_ah                    = attr->max_ah;
433         resp->max_fmr                   = attr->max_fmr;
434         resp->max_map_per_fmr           = attr->max_map_per_fmr;
435         resp->max_srq                   = attr->max_srq;
436         resp->max_srq_wr                = attr->max_srq_wr;
437         resp->max_srq_sge               = attr->max_srq_sge;
438         resp->max_pkeys                 = attr->max_pkeys;
439         resp->local_ca_ack_delay        = attr->local_ca_ack_delay;
440         resp->phys_port_cnt             = ib_dev->phys_port_cnt;
441 }
442
443 ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
444                                struct ib_device *ib_dev,
445                                const char __user *buf,
446                                int in_len, int out_len)
447 {
448         struct ib_uverbs_query_device      cmd;
449         struct ib_uverbs_query_device_resp resp;
450         struct ib_device_attr              attr;
451         int                                ret;
452
453         if (out_len < sizeof resp)
454                 return -ENOSPC;
455
456         if (copy_from_user(&cmd, buf, sizeof cmd))
457                 return -EFAULT;
458
459         ret = ib_query_device(ib_dev, &attr);
460         if (ret)
461                 return ret;
462
463         memset(&resp, 0, sizeof resp);
464         copy_query_dev_fields(file, ib_dev, &resp, &attr);
465
466         if (copy_to_user((void __user *) (unsigned long) cmd.response,
467                          &resp, sizeof resp))
468                 return -EFAULT;
469
470         return in_len;
471 }
472
473 ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
474                              struct ib_device *ib_dev,
475                              const char __user *buf,
476                              int in_len, int out_len)
477 {
478         struct ib_uverbs_query_port      cmd;
479         struct ib_uverbs_query_port_resp resp;
480         struct ib_port_attr              attr;
481         int                              ret;
482
483         if (out_len < sizeof resp)
484                 return -ENOSPC;
485
486         if (copy_from_user(&cmd, buf, sizeof cmd))
487                 return -EFAULT;
488
489         ret = ib_query_port(ib_dev, cmd.port_num, &attr);
490         if (ret)
491                 return ret;
492
493         memset(&resp, 0, sizeof resp);
494
495         resp.state           = attr.state;
496         resp.max_mtu         = attr.max_mtu;
497         resp.active_mtu      = attr.active_mtu;
498         resp.gid_tbl_len     = attr.gid_tbl_len;
499         resp.port_cap_flags  = attr.port_cap_flags;
500         resp.max_msg_sz      = attr.max_msg_sz;
501         resp.bad_pkey_cntr   = attr.bad_pkey_cntr;
502         resp.qkey_viol_cntr  = attr.qkey_viol_cntr;
503         resp.pkey_tbl_len    = attr.pkey_tbl_len;
504         resp.lid             = attr.lid;
505         resp.sm_lid          = attr.sm_lid;
506         resp.lmc             = attr.lmc;
507         resp.max_vl_num      = attr.max_vl_num;
508         resp.sm_sl           = attr.sm_sl;
509         resp.subnet_timeout  = attr.subnet_timeout;
510         resp.init_type_reply = attr.init_type_reply;
511         resp.active_width    = attr.active_width;
512         resp.active_speed    = attr.active_speed;
513         resp.phys_state      = attr.phys_state;
514         resp.link_layer      = rdma_port_get_link_layer(ib_dev,
515                                                         cmd.port_num);
516
517         if (copy_to_user((void __user *) (unsigned long) cmd.response,
518                          &resp, sizeof resp))
519                 return -EFAULT;
520
521         return in_len;
522 }
523
524 ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
525                            struct ib_device *ib_dev,
526                            const char __user *buf,
527                            int in_len, int out_len)
528 {
529         struct ib_uverbs_alloc_pd      cmd;
530         struct ib_uverbs_alloc_pd_resp resp;
531         struct ib_udata                udata;
532         struct ib_uobject             *uobj;
533         struct ib_pd                  *pd;
534         int                            ret;
535
536         if (out_len < sizeof resp)
537                 return -ENOSPC;
538
539         if (copy_from_user(&cmd, buf, sizeof cmd))
540                 return -EFAULT;
541
542         INIT_UDATA(&udata, buf + sizeof cmd,
543                    (unsigned long) cmd.response + sizeof resp,
544                    in_len - sizeof cmd, out_len - sizeof resp);
545
546         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
547         if (!uobj)
548                 return -ENOMEM;
549
550         init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
551         down_write(&uobj->mutex);
552
553         pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
554         if (IS_ERR(pd)) {
555                 ret = PTR_ERR(pd);
556                 goto err;
557         }
558
559         pd->device  = ib_dev;
560         pd->uobject = uobj;
561         pd->local_mr = NULL;
562         atomic_set(&pd->usecnt, 0);
563
564         uobj->object = pd;
565         ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
566         if (ret)
567                 goto err_idr;
568
569         memset(&resp, 0, sizeof resp);
570         resp.pd_handle = uobj->id;
571
572         if (copy_to_user((void __user *) (unsigned long) cmd.response,
573                          &resp, sizeof resp)) {
574                 ret = -EFAULT;
575                 goto err_copy;
576         }
577
578         mutex_lock(&file->mutex);
579         list_add_tail(&uobj->list, &file->ucontext->pd_list);
580         mutex_unlock(&file->mutex);
581
582         uobj->live = 1;
583
584         up_write(&uobj->mutex);
585
586         return in_len;
587
588 err_copy:
589         idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
590
591 err_idr:
592         ib_dealloc_pd(pd);
593
594 err:
595         put_uobj_write(uobj);
596         return ret;
597 }
598
599 ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
600                              struct ib_device *ib_dev,
601                              const char __user *buf,
602                              int in_len, int out_len)
603 {
604         struct ib_uverbs_dealloc_pd cmd;
605         struct ib_uobject          *uobj;
606         struct ib_pd               *pd;
607         int                         ret;
608
609         if (copy_from_user(&cmd, buf, sizeof cmd))
610                 return -EFAULT;
611
612         uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
613         if (!uobj)
614                 return -EINVAL;
615         pd = uobj->object;
616
617         if (atomic_read(&pd->usecnt)) {
618                 ret = -EBUSY;
619                 goto err_put;
620         }
621
622         ret = pd->device->dealloc_pd(uobj->object);
623         WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
624         if (ret)
625                 goto err_put;
626
627         uobj->live = 0;
628         put_uobj_write(uobj);
629
630         idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
631
632         mutex_lock(&file->mutex);
633         list_del(&uobj->list);
634         mutex_unlock(&file->mutex);
635
636         put_uobj(uobj);
637
638         return in_len;
639
640 err_put:
641         put_uobj_write(uobj);
642         return ret;
643 }
644
645 struct xrcd_table_entry {
646         struct rb_node  node;
647         struct ib_xrcd *xrcd;
648         struct inode   *inode;
649 };
650
651 static int xrcd_table_insert(struct ib_uverbs_device *dev,
652                             struct inode *inode,
653                             struct ib_xrcd *xrcd)
654 {
655         struct xrcd_table_entry *entry, *scan;
656         struct rb_node **p = &dev->xrcd_tree.rb_node;
657         struct rb_node *parent = NULL;
658
659         entry = kmalloc(sizeof *entry, GFP_KERNEL);
660         if (!entry)
661                 return -ENOMEM;
662
663         entry->xrcd  = xrcd;
664         entry->inode = inode;
665
666         while (*p) {
667                 parent = *p;
668                 scan = rb_entry(parent, struct xrcd_table_entry, node);
669
670                 if (inode < scan->inode) {
671                         p = &(*p)->rb_left;
672                 } else if (inode > scan->inode) {
673                         p = &(*p)->rb_right;
674                 } else {
675                         kfree(entry);
676                         return -EEXIST;
677                 }
678         }
679
680         rb_link_node(&entry->node, parent, p);
681         rb_insert_color(&entry->node, &dev->xrcd_tree);
682         igrab(inode);
683         return 0;
684 }
685
686 static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev,
687                                                   struct inode *inode)
688 {
689         struct xrcd_table_entry *entry;
690         struct rb_node *p = dev->xrcd_tree.rb_node;
691
692         while (p) {
693                 entry = rb_entry(p, struct xrcd_table_entry, node);
694
695                 if (inode < entry->inode)
696                         p = p->rb_left;
697                 else if (inode > entry->inode)
698                         p = p->rb_right;
699                 else
700                         return entry;
701         }
702
703         return NULL;
704 }
705
706 static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode)
707 {
708         struct xrcd_table_entry *entry;
709
710         entry = xrcd_table_search(dev, inode);
711         if (!entry)
712                 return NULL;
713
714         return entry->xrcd;
715 }
716
717 static void xrcd_table_delete(struct ib_uverbs_device *dev,
718                               struct inode *inode)
719 {
720         struct xrcd_table_entry *entry;
721
722         entry = xrcd_table_search(dev, inode);
723         if (entry) {
724                 iput(inode);
725                 rb_erase(&entry->node, &dev->xrcd_tree);
726                 kfree(entry);
727         }
728 }
729
730 ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
731                             struct ib_device *ib_dev,
732                             const char __user *buf, int in_len,
733                             int out_len)
734 {
735         struct ib_uverbs_open_xrcd      cmd;
736         struct ib_uverbs_open_xrcd_resp resp;
737         struct ib_udata                 udata;
738         struct ib_uxrcd_object         *obj;
739         struct ib_xrcd                 *xrcd = NULL;
740         struct fd                       f = {NULL, 0};
741         struct inode                   *inode = NULL;
742         int                             ret = 0;
743         int                             new_xrcd = 0;
744
745         if (out_len < sizeof resp)
746                 return -ENOSPC;
747
748         if (copy_from_user(&cmd, buf, sizeof cmd))
749                 return -EFAULT;
750
751         INIT_UDATA(&udata, buf + sizeof cmd,
752                    (unsigned long) cmd.response + sizeof resp,
753                    in_len - sizeof cmd, out_len - sizeof  resp);
754
755         mutex_lock(&file->device->xrcd_tree_mutex);
756
757         if (cmd.fd != -1) {
758                 /* search for file descriptor */
759                 f = fdget(cmd.fd);
760                 if (!f.file) {
761                         ret = -EBADF;
762                         goto err_tree_mutex_unlock;
763                 }
764
765                 inode = file_inode(f.file);
766                 xrcd = find_xrcd(file->device, inode);
767                 if (!xrcd && !(cmd.oflags & O_CREAT)) {
768                         /* no file descriptor. Need CREATE flag */
769                         ret = -EAGAIN;
770                         goto err_tree_mutex_unlock;
771                 }
772
773                 if (xrcd && cmd.oflags & O_EXCL) {
774                         ret = -EINVAL;
775                         goto err_tree_mutex_unlock;
776                 }
777         }
778
779         obj = kmalloc(sizeof *obj, GFP_KERNEL);
780         if (!obj) {
781                 ret = -ENOMEM;
782                 goto err_tree_mutex_unlock;
783         }
784
785         init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
786
787         down_write(&obj->uobject.mutex);
788
789         if (!xrcd) {
790                 xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
791                 if (IS_ERR(xrcd)) {
792                         ret = PTR_ERR(xrcd);
793                         goto err;
794                 }
795
796                 xrcd->inode   = inode;
797                 xrcd->device  = ib_dev;
798                 atomic_set(&xrcd->usecnt, 0);
799                 mutex_init(&xrcd->tgt_qp_mutex);
800                 INIT_LIST_HEAD(&xrcd->tgt_qp_list);
801                 new_xrcd = 1;
802         }
803
804         atomic_set(&obj->refcnt, 0);
805         obj->uobject.object = xrcd;
806         ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
807         if (ret)
808                 goto err_idr;
809
810         memset(&resp, 0, sizeof resp);
811         resp.xrcd_handle = obj->uobject.id;
812
813         if (inode) {
814                 if (new_xrcd) {
815                         /* create new inode/xrcd table entry */
816                         ret = xrcd_table_insert(file->device, inode, xrcd);
817                         if (ret)
818                                 goto err_insert_xrcd;
819                 }
820                 atomic_inc(&xrcd->usecnt);
821         }
822
823         if (copy_to_user((void __user *) (unsigned long) cmd.response,
824                          &resp, sizeof resp)) {
825                 ret = -EFAULT;
826                 goto err_copy;
827         }
828
829         if (f.file)
830                 fdput(f);
831
832         mutex_lock(&file->mutex);
833         list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
834         mutex_unlock(&file->mutex);
835
836         obj->uobject.live = 1;
837         up_write(&obj->uobject.mutex);
838
839         mutex_unlock(&file->device->xrcd_tree_mutex);
840         return in_len;
841
842 err_copy:
843         if (inode) {
844                 if (new_xrcd)
845                         xrcd_table_delete(file->device, inode);
846                 atomic_dec(&xrcd->usecnt);
847         }
848
849 err_insert_xrcd:
850         idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
851
852 err_idr:
853         ib_dealloc_xrcd(xrcd);
854
855 err:
856         put_uobj_write(&obj->uobject);
857
858 err_tree_mutex_unlock:
859         if (f.file)
860                 fdput(f);
861
862         mutex_unlock(&file->device->xrcd_tree_mutex);
863
864         return ret;
865 }
866
867 ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
868                              struct ib_device *ib_dev,
869                              const char __user *buf, int in_len,
870                              int out_len)
871 {
872         struct ib_uverbs_close_xrcd cmd;
873         struct ib_uobject           *uobj;
874         struct ib_xrcd              *xrcd = NULL;
875         struct inode                *inode = NULL;
876         struct ib_uxrcd_object      *obj;
877         int                         live;
878         int                         ret = 0;
879
880         if (copy_from_user(&cmd, buf, sizeof cmd))
881                 return -EFAULT;
882
883         mutex_lock(&file->device->xrcd_tree_mutex);
884         uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
885         if (!uobj) {
886                 ret = -EINVAL;
887                 goto out;
888         }
889
890         xrcd  = uobj->object;
891         inode = xrcd->inode;
892         obj   = container_of(uobj, struct ib_uxrcd_object, uobject);
893         if (atomic_read(&obj->refcnt)) {
894                 put_uobj_write(uobj);
895                 ret = -EBUSY;
896                 goto out;
897         }
898
899         if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
900                 ret = ib_dealloc_xrcd(uobj->object);
901                 if (!ret)
902                         uobj->live = 0;
903         }
904
905         live = uobj->live;
906         if (inode && ret)
907                 atomic_inc(&xrcd->usecnt);
908
909         put_uobj_write(uobj);
910
911         if (ret)
912                 goto out;
913
914         if (inode && !live)
915                 xrcd_table_delete(file->device, inode);
916
917         idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
918         mutex_lock(&file->mutex);
919         list_del(&uobj->list);
920         mutex_unlock(&file->mutex);
921
922         put_uobj(uobj);
923         ret = in_len;
924
925 out:
926         mutex_unlock(&file->device->xrcd_tree_mutex);
927         return ret;
928 }
929
930 void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
931                             struct ib_xrcd *xrcd)
932 {
933         struct inode *inode;
934
935         inode = xrcd->inode;
936         if (inode && !atomic_dec_and_test(&xrcd->usecnt))
937                 return;
938
939         ib_dealloc_xrcd(xrcd);
940
941         if (inode)
942                 xrcd_table_delete(dev, inode);
943 }
944
945 ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
946                          struct ib_device *ib_dev,
947                          const char __user *buf, int in_len,
948                          int out_len)
949 {
950         struct ib_uverbs_reg_mr      cmd;
951         struct ib_uverbs_reg_mr_resp resp;
952         struct ib_udata              udata;
953         struct ib_uobject           *uobj;
954         struct ib_pd                *pd;
955         struct ib_mr                *mr;
956         int                          ret;
957
958         if (out_len < sizeof resp)
959                 return -ENOSPC;
960
961         if (copy_from_user(&cmd, buf, sizeof cmd))
962                 return -EFAULT;
963
964         INIT_UDATA(&udata, buf + sizeof cmd,
965                    (unsigned long) cmd.response + sizeof resp,
966                    in_len - sizeof cmd, out_len - sizeof resp);
967
968         if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
969                 return -EINVAL;
970
971         ret = ib_check_mr_access(cmd.access_flags);
972         if (ret)
973                 return ret;
974
975         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
976         if (!uobj)
977                 return -ENOMEM;
978
979         init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
980         down_write(&uobj->mutex);
981
982         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
983         if (!pd) {
984                 ret = -EINVAL;
985                 goto err_free;
986         }
987
988         if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
989                 struct ib_device_attr attr;
990
991                 ret = ib_query_device(pd->device, &attr);
992                 if (ret || !(attr.device_cap_flags &
993                                 IB_DEVICE_ON_DEMAND_PAGING)) {
994                         pr_debug("ODP support not available\n");
995                         ret = -EINVAL;
996                         goto err_put;
997                 }
998         }
999
1000         mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
1001                                      cmd.access_flags, &udata);
1002         if (IS_ERR(mr)) {
1003                 ret = PTR_ERR(mr);
1004                 goto err_put;
1005         }
1006
1007         mr->device  = pd->device;
1008         mr->pd      = pd;
1009         mr->uobject = uobj;
1010         atomic_inc(&pd->usecnt);
1011         atomic_set(&mr->usecnt, 0);
1012
1013         uobj->object = mr;
1014         ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
1015         if (ret)
1016                 goto err_unreg;
1017
1018         memset(&resp, 0, sizeof resp);
1019         resp.lkey      = mr->lkey;
1020         resp.rkey      = mr->rkey;
1021         resp.mr_handle = uobj->id;
1022
1023         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1024                          &resp, sizeof resp)) {
1025                 ret = -EFAULT;
1026                 goto err_copy;
1027         }
1028
1029         put_pd_read(pd);
1030
1031         mutex_lock(&file->mutex);
1032         list_add_tail(&uobj->list, &file->ucontext->mr_list);
1033         mutex_unlock(&file->mutex);
1034
1035         uobj->live = 1;
1036
1037         up_write(&uobj->mutex);
1038
1039         return in_len;
1040
1041 err_copy:
1042         idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
1043
1044 err_unreg:
1045         ib_dereg_mr(mr);
1046
1047 err_put:
1048         put_pd_read(pd);
1049
1050 err_free:
1051         put_uobj_write(uobj);
1052         return ret;
1053 }
1054
1055 ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
1056                            struct ib_device *ib_dev,
1057                            const char __user *buf, int in_len,
1058                            int out_len)
1059 {
1060         struct ib_uverbs_rereg_mr      cmd;
1061         struct ib_uverbs_rereg_mr_resp resp;
1062         struct ib_udata              udata;
1063         struct ib_pd                *pd = NULL;
1064         struct ib_mr                *mr;
1065         struct ib_pd                *old_pd;
1066         int                          ret;
1067         struct ib_uobject           *uobj;
1068
1069         if (out_len < sizeof(resp))
1070                 return -ENOSPC;
1071
1072         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1073                 return -EFAULT;
1074
1075         INIT_UDATA(&udata, buf + sizeof(cmd),
1076                    (unsigned long) cmd.response + sizeof(resp),
1077                    in_len - sizeof(cmd), out_len - sizeof(resp));
1078
1079         if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
1080                 return -EINVAL;
1081
1082         if ((cmd.flags & IB_MR_REREG_TRANS) &&
1083             (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
1084              (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
1085                         return -EINVAL;
1086
1087         uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
1088                               file->ucontext);
1089
1090         if (!uobj)
1091                 return -EINVAL;
1092
1093         mr = uobj->object;
1094
1095         if (cmd.flags & IB_MR_REREG_ACCESS) {
1096                 ret = ib_check_mr_access(cmd.access_flags);
1097                 if (ret)
1098                         goto put_uobjs;
1099         }
1100
1101         if (cmd.flags & IB_MR_REREG_PD) {
1102                 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1103                 if (!pd) {
1104                         ret = -EINVAL;
1105                         goto put_uobjs;
1106                 }
1107         }
1108
1109         if (atomic_read(&mr->usecnt)) {
1110                 ret = -EBUSY;
1111                 goto put_uobj_pd;
1112         }
1113
1114         old_pd = mr->pd;
1115         ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
1116                                         cmd.length, cmd.hca_va,
1117                                         cmd.access_flags, pd, &udata);
1118         if (!ret) {
1119                 if (cmd.flags & IB_MR_REREG_PD) {
1120                         atomic_inc(&pd->usecnt);
1121                         mr->pd = pd;
1122                         atomic_dec(&old_pd->usecnt);
1123                 }
1124         } else {
1125                 goto put_uobj_pd;
1126         }
1127
1128         memset(&resp, 0, sizeof(resp));
1129         resp.lkey      = mr->lkey;
1130         resp.rkey      = mr->rkey;
1131
1132         if (copy_to_user((void __user *)(unsigned long)cmd.response,
1133                          &resp, sizeof(resp)))
1134                 ret = -EFAULT;
1135         else
1136                 ret = in_len;
1137
1138 put_uobj_pd:
1139         if (cmd.flags & IB_MR_REREG_PD)
1140                 put_pd_read(pd);
1141
1142 put_uobjs:
1143
1144         put_uobj_write(mr->uobject);
1145
1146         return ret;
1147 }
1148
1149 ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
1150                            struct ib_device *ib_dev,
1151                            const char __user *buf, int in_len,
1152                            int out_len)
1153 {
1154         struct ib_uverbs_dereg_mr cmd;
1155         struct ib_mr             *mr;
1156         struct ib_uobject        *uobj;
1157         int                       ret = -EINVAL;
1158
1159         if (copy_from_user(&cmd, buf, sizeof cmd))
1160                 return -EFAULT;
1161
1162         uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
1163         if (!uobj)
1164                 return -EINVAL;
1165
1166         mr = uobj->object;
1167
1168         ret = ib_dereg_mr(mr);
1169         if (!ret)
1170                 uobj->live = 0;
1171
1172         put_uobj_write(uobj);
1173
1174         if (ret)
1175                 return ret;
1176
1177         idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
1178
1179         mutex_lock(&file->mutex);
1180         list_del(&uobj->list);
1181         mutex_unlock(&file->mutex);
1182
1183         put_uobj(uobj);
1184
1185         return in_len;
1186 }
1187
1188 ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
1189                            struct ib_device *ib_dev,
1190                            const char __user *buf, int in_len,
1191                            int out_len)
1192 {
1193         struct ib_uverbs_alloc_mw      cmd;
1194         struct ib_uverbs_alloc_mw_resp resp;
1195         struct ib_uobject             *uobj;
1196         struct ib_pd                  *pd;
1197         struct ib_mw                  *mw;
1198         int                            ret;
1199
1200         if (out_len < sizeof(resp))
1201                 return -ENOSPC;
1202
1203         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1204                 return -EFAULT;
1205
1206         uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
1207         if (!uobj)
1208                 return -ENOMEM;
1209
1210         init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
1211         down_write(&uobj->mutex);
1212
1213         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1214         if (!pd) {
1215                 ret = -EINVAL;
1216                 goto err_free;
1217         }
1218
1219         mw = pd->device->alloc_mw(pd, cmd.mw_type);
1220         if (IS_ERR(mw)) {
1221                 ret = PTR_ERR(mw);
1222                 goto err_put;
1223         }
1224
1225         mw->device  = pd->device;
1226         mw->pd      = pd;
1227         mw->uobject = uobj;
1228         atomic_inc(&pd->usecnt);
1229
1230         uobj->object = mw;
1231         ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
1232         if (ret)
1233                 goto err_unalloc;
1234
1235         memset(&resp, 0, sizeof(resp));
1236         resp.rkey      = mw->rkey;
1237         resp.mw_handle = uobj->id;
1238
1239         if (copy_to_user((void __user *)(unsigned long)cmd.response,
1240                          &resp, sizeof(resp))) {
1241                 ret = -EFAULT;
1242                 goto err_copy;
1243         }
1244
1245         put_pd_read(pd);
1246
1247         mutex_lock(&file->mutex);
1248         list_add_tail(&uobj->list, &file->ucontext->mw_list);
1249         mutex_unlock(&file->mutex);
1250
1251         uobj->live = 1;
1252
1253         up_write(&uobj->mutex);
1254
1255         return in_len;
1256
1257 err_copy:
1258         idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
1259
1260 err_unalloc:
1261         ib_dealloc_mw(mw);
1262
1263 err_put:
1264         put_pd_read(pd);
1265
1266 err_free:
1267         put_uobj_write(uobj);
1268         return ret;
1269 }
1270
1271 ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
1272                              struct ib_device *ib_dev,
1273                              const char __user *buf, int in_len,
1274                              int out_len)
1275 {
1276         struct ib_uverbs_dealloc_mw cmd;
1277         struct ib_mw               *mw;
1278         struct ib_uobject          *uobj;
1279         int                         ret = -EINVAL;
1280
1281         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1282                 return -EFAULT;
1283
1284         uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
1285         if (!uobj)
1286                 return -EINVAL;
1287
1288         mw = uobj->object;
1289
1290         ret = ib_dealloc_mw(mw);
1291         if (!ret)
1292                 uobj->live = 0;
1293
1294         put_uobj_write(uobj);
1295
1296         if (ret)
1297                 return ret;
1298
1299         idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
1300
1301         mutex_lock(&file->mutex);
1302         list_del(&uobj->list);
1303         mutex_unlock(&file->mutex);
1304
1305         put_uobj(uobj);
1306
1307         return in_len;
1308 }
1309
1310 ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
1311                                       struct ib_device *ib_dev,
1312                                       const char __user *buf, int in_len,
1313                                       int out_len)
1314 {
1315         struct ib_uverbs_create_comp_channel       cmd;
1316         struct ib_uverbs_create_comp_channel_resp  resp;
1317         struct file                               *filp;
1318         int ret;
1319
1320         if (out_len < sizeof resp)
1321                 return -ENOSPC;
1322
1323         if (copy_from_user(&cmd, buf, sizeof cmd))
1324                 return -EFAULT;
1325
1326         ret = get_unused_fd_flags(O_CLOEXEC);
1327         if (ret < 0)
1328                 return ret;
1329         resp.fd = ret;
1330
1331         filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
1332         if (IS_ERR(filp)) {
1333                 put_unused_fd(resp.fd);
1334                 return PTR_ERR(filp);
1335         }
1336
1337         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1338                          &resp, sizeof resp)) {
1339                 put_unused_fd(resp.fd);
1340                 fput(filp);
1341                 return -EFAULT;
1342         }
1343
1344         fd_install(resp.fd, filp);
1345         return in_len;
1346 }
1347
1348 static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
1349                                         struct ib_device *ib_dev,
1350                                        struct ib_udata *ucore,
1351                                        struct ib_udata *uhw,
1352                                        struct ib_uverbs_ex_create_cq *cmd,
1353                                        size_t cmd_sz,
1354                                        int (*cb)(struct ib_uverbs_file *file,
1355                                                  struct ib_ucq_object *obj,
1356                                                  struct ib_uverbs_ex_create_cq_resp *resp,
1357                                                  struct ib_udata *udata,
1358                                                  void *context),
1359                                        void *context)
1360 {
1361         struct ib_ucq_object           *obj;
1362         struct ib_uverbs_event_file    *ev_file = NULL;
1363         struct ib_cq                   *cq;
1364         int                             ret;
1365         struct ib_uverbs_ex_create_cq_resp resp;
1366         struct ib_cq_init_attr attr = {};
1367
1368         if (cmd->comp_vector >= file->device->num_comp_vectors)
1369                 return ERR_PTR(-EINVAL);
1370
1371         obj = kmalloc(sizeof *obj, GFP_KERNEL);
1372         if (!obj)
1373                 return ERR_PTR(-ENOMEM);
1374
1375         init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class);
1376         down_write(&obj->uobject.mutex);
1377
1378         if (cmd->comp_channel >= 0) {
1379                 ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
1380                 if (!ev_file) {
1381                         ret = -EINVAL;
1382                         goto err;
1383                 }
1384         }
1385
1386         obj->uverbs_file           = file;
1387         obj->comp_events_reported  = 0;
1388         obj->async_events_reported = 0;
1389         INIT_LIST_HEAD(&obj->comp_list);
1390         INIT_LIST_HEAD(&obj->async_list);
1391
1392         attr.cqe = cmd->cqe;
1393         attr.comp_vector = cmd->comp_vector;
1394
1395         if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
1396                 attr.flags = cmd->flags;
1397
1398         cq = ib_dev->create_cq(ib_dev, &attr,
1399                                              file->ucontext, uhw);
1400         if (IS_ERR(cq)) {
1401                 ret = PTR_ERR(cq);
1402                 goto err_file;
1403         }
1404
1405         cq->device        = ib_dev;
1406         cq->uobject       = &obj->uobject;
1407         cq->comp_handler  = ib_uverbs_comp_handler;
1408         cq->event_handler = ib_uverbs_cq_event_handler;
1409         cq->cq_context    = ev_file;
1410         atomic_set(&cq->usecnt, 0);
1411
1412         obj->uobject.object = cq;
1413         ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
1414         if (ret)
1415                 goto err_free;
1416
1417         memset(&resp, 0, sizeof resp);
1418         resp.base.cq_handle = obj->uobject.id;
1419         resp.base.cqe       = cq->cqe;
1420
1421         resp.response_length = offsetof(typeof(resp), response_length) +
1422                 sizeof(resp.response_length);
1423
1424         ret = cb(file, obj, &resp, ucore, context);
1425         if (ret)
1426                 goto err_cb;
1427
1428         mutex_lock(&file->mutex);
1429         list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
1430         mutex_unlock(&file->mutex);
1431
1432         obj->uobject.live = 1;
1433
1434         up_write(&obj->uobject.mutex);
1435
1436         return obj;
1437
1438 err_cb:
1439         idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
1440
1441 err_free:
1442         ib_destroy_cq(cq);
1443
1444 err_file:
1445         if (ev_file)
1446                 ib_uverbs_release_ucq(file, ev_file, obj);
1447
1448 err:
1449         put_uobj_write(&obj->uobject);
1450
1451         return ERR_PTR(ret);
1452 }
1453
1454 static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
1455                                   struct ib_ucq_object *obj,
1456                                   struct ib_uverbs_ex_create_cq_resp *resp,
1457                                   struct ib_udata *ucore, void *context)
1458 {
1459         if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
1460                 return -EFAULT;
1461
1462         return 0;
1463 }
1464
1465 ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
1466                             struct ib_device *ib_dev,
1467                             const char __user *buf, int in_len,
1468                             int out_len)
1469 {
1470         struct ib_uverbs_create_cq      cmd;
1471         struct ib_uverbs_ex_create_cq   cmd_ex;
1472         struct ib_uverbs_create_cq_resp resp;
1473         struct ib_udata                 ucore;
1474         struct ib_udata                 uhw;
1475         struct ib_ucq_object           *obj;
1476
1477         if (out_len < sizeof(resp))
1478                 return -ENOSPC;
1479
1480         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1481                 return -EFAULT;
1482
1483         INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp));
1484
1485         INIT_UDATA(&uhw, buf + sizeof(cmd),
1486                    (unsigned long)cmd.response + sizeof(resp),
1487                    in_len - sizeof(cmd), out_len - sizeof(resp));
1488
1489         memset(&cmd_ex, 0, sizeof(cmd_ex));
1490         cmd_ex.user_handle = cmd.user_handle;
1491         cmd_ex.cqe = cmd.cqe;
1492         cmd_ex.comp_vector = cmd.comp_vector;
1493         cmd_ex.comp_channel = cmd.comp_channel;
1494
1495         obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex,
1496                         offsetof(typeof(cmd_ex), comp_channel) +
1497                         sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
1498                         NULL);
1499
1500         if (IS_ERR(obj))
1501                 return PTR_ERR(obj);
1502
1503         return in_len;
1504 }
1505
1506 static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
1507                                      struct ib_ucq_object *obj,
1508                                      struct ib_uverbs_ex_create_cq_resp *resp,
1509                                      struct ib_udata *ucore, void *context)
1510 {
1511         if (ib_copy_to_udata(ucore, resp, resp->response_length))
1512                 return -EFAULT;
1513
1514         return 0;
1515 }
1516
1517 int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
1518                          struct ib_device *ib_dev,
1519                            struct ib_udata *ucore,
1520                            struct ib_udata *uhw)
1521 {
1522         struct ib_uverbs_ex_create_cq_resp resp;
1523         struct ib_uverbs_ex_create_cq  cmd;
1524         struct ib_ucq_object           *obj;
1525         int err;
1526
1527         if (ucore->inlen < sizeof(cmd))
1528                 return -EINVAL;
1529
1530         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
1531         if (err)
1532                 return err;
1533
1534         if (cmd.comp_mask)
1535                 return -EINVAL;
1536
1537         if (cmd.reserved)
1538                 return -EINVAL;
1539
1540         if (ucore->outlen < (offsetof(typeof(resp), response_length) +
1541                              sizeof(resp.response_length)))
1542                 return -ENOSPC;
1543
1544         obj = create_cq(file, ib_dev, ucore, uhw, &cmd,
1545                         min(ucore->inlen, sizeof(cmd)),
1546                         ib_uverbs_ex_create_cq_cb, NULL);
1547
1548         if (IS_ERR(obj))
1549                 return PTR_ERR(obj);
1550
1551         return 0;
1552 }
1553
1554 ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
1555                             struct ib_device *ib_dev,
1556                             const char __user *buf, int in_len,
1557                             int out_len)
1558 {
1559         struct ib_uverbs_resize_cq      cmd;
1560         struct ib_uverbs_resize_cq_resp resp;
1561         struct ib_udata                 udata;
1562         struct ib_cq                    *cq;
1563         int                             ret = -EINVAL;
1564
1565         if (copy_from_user(&cmd, buf, sizeof cmd))
1566                 return -EFAULT;
1567
1568         INIT_UDATA(&udata, buf + sizeof cmd,
1569                    (unsigned long) cmd.response + sizeof resp,
1570                    in_len - sizeof cmd, out_len - sizeof resp);
1571
1572         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1573         if (!cq)
1574                 return -EINVAL;
1575
1576         ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
1577         if (ret)
1578                 goto out;
1579
1580         resp.cqe = cq->cqe;
1581
1582         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1583                          &resp, sizeof resp.cqe))
1584                 ret = -EFAULT;
1585
1586 out:
1587         put_cq_read(cq);
1588
1589         return ret ? ret : in_len;
1590 }
1591
1592 static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
1593 {
1594         struct ib_uverbs_wc tmp;
1595
1596         tmp.wr_id               = wc->wr_id;
1597         tmp.status              = wc->status;
1598         tmp.opcode              = wc->opcode;
1599         tmp.vendor_err          = wc->vendor_err;
1600         tmp.byte_len            = wc->byte_len;
1601         tmp.ex.imm_data         = (__u32 __force) wc->ex.imm_data;
1602         tmp.qp_num              = wc->qp->qp_num;
1603         tmp.src_qp              = wc->src_qp;
1604         tmp.wc_flags            = wc->wc_flags;
1605         tmp.pkey_index          = wc->pkey_index;
1606         tmp.slid                = wc->slid;
1607         tmp.sl                  = wc->sl;
1608         tmp.dlid_path_bits      = wc->dlid_path_bits;
1609         tmp.port_num            = wc->port_num;
1610         tmp.reserved            = 0;
1611
1612         if (copy_to_user(dest, &tmp, sizeof tmp))
1613                 return -EFAULT;
1614
1615         return 0;
1616 }
1617
1618 ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
1619                           struct ib_device *ib_dev,
1620                           const char __user *buf, int in_len,
1621                           int out_len)
1622 {
1623         struct ib_uverbs_poll_cq       cmd;
1624         struct ib_uverbs_poll_cq_resp  resp;
1625         u8 __user                     *header_ptr;
1626         u8 __user                     *data_ptr;
1627         struct ib_cq                  *cq;
1628         struct ib_wc                   wc;
1629         int                            ret;
1630
1631         if (copy_from_user(&cmd, buf, sizeof cmd))
1632                 return -EFAULT;
1633
1634         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1635         if (!cq)
1636                 return -EINVAL;
1637
1638         /* we copy a struct ib_uverbs_poll_cq_resp to user space */
1639         header_ptr = (void __user *)(unsigned long) cmd.response;
1640         data_ptr = header_ptr + sizeof resp;
1641
1642         memset(&resp, 0, sizeof resp);
1643         while (resp.count < cmd.ne) {
1644                 ret = ib_poll_cq(cq, 1, &wc);
1645                 if (ret < 0)
1646                         goto out_put;
1647                 if (!ret)
1648                         break;
1649
1650                 ret = copy_wc_to_user(data_ptr, &wc);
1651                 if (ret)
1652                         goto out_put;
1653
1654                 data_ptr += sizeof(struct ib_uverbs_wc);
1655                 ++resp.count;
1656         }
1657
1658         if (copy_to_user(header_ptr, &resp, sizeof resp)) {
1659                 ret = -EFAULT;
1660                 goto out_put;
1661         }
1662
1663         ret = in_len;
1664
1665 out_put:
1666         put_cq_read(cq);
1667         return ret;
1668 }
1669
1670 ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
1671                                 struct ib_device *ib_dev,
1672                                 const char __user *buf, int in_len,
1673                                 int out_len)
1674 {
1675         struct ib_uverbs_req_notify_cq cmd;
1676         struct ib_cq                  *cq;
1677
1678         if (copy_from_user(&cmd, buf, sizeof cmd))
1679                 return -EFAULT;
1680
1681         cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
1682         if (!cq)
1683                 return -EINVAL;
1684
1685         ib_req_notify_cq(cq, cmd.solicited_only ?
1686                          IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
1687
1688         put_cq_read(cq);
1689
1690         return in_len;
1691 }
1692
1693 ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
1694                              struct ib_device *ib_dev,
1695                              const char __user *buf, int in_len,
1696                              int out_len)
1697 {
1698         struct ib_uverbs_destroy_cq      cmd;
1699         struct ib_uverbs_destroy_cq_resp resp;
1700         struct ib_uobject               *uobj;
1701         struct ib_cq                    *cq;
1702         struct ib_ucq_object            *obj;
1703         struct ib_uverbs_event_file     *ev_file;
1704         int                              ret = -EINVAL;
1705
1706         if (copy_from_user(&cmd, buf, sizeof cmd))
1707                 return -EFAULT;
1708
1709         uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
1710         if (!uobj)
1711                 return -EINVAL;
1712         cq      = uobj->object;
1713         ev_file = cq->cq_context;
1714         obj     = container_of(cq->uobject, struct ib_ucq_object, uobject);
1715
1716         ret = ib_destroy_cq(cq);
1717         if (!ret)
1718                 uobj->live = 0;
1719
1720         put_uobj_write(uobj);
1721
1722         if (ret)
1723                 return ret;
1724
1725         idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
1726
1727         mutex_lock(&file->mutex);
1728         list_del(&uobj->list);
1729         mutex_unlock(&file->mutex);
1730
1731         ib_uverbs_release_ucq(file, ev_file, obj);
1732
1733         memset(&resp, 0, sizeof resp);
1734         resp.comp_events_reported  = obj->comp_events_reported;
1735         resp.async_events_reported = obj->async_events_reported;
1736
1737         put_uobj(uobj);
1738
1739         if (copy_to_user((void __user *) (unsigned long) cmd.response,
1740                          &resp, sizeof resp))
1741                 return -EFAULT;
1742
1743         return in_len;
1744 }
1745
1746 static int create_qp(struct ib_uverbs_file *file,
1747                      struct ib_udata *ucore,
1748                      struct ib_udata *uhw,
1749                      struct ib_uverbs_ex_create_qp *cmd,
1750                      size_t cmd_sz,
1751                      int (*cb)(struct ib_uverbs_file *file,
1752                                struct ib_uverbs_ex_create_qp_resp *resp,
1753                                struct ib_udata *udata),
1754                      void *context)
1755 {
1756         struct ib_uqp_object            *obj;
1757         struct ib_device                *device;
1758         struct ib_pd                    *pd = NULL;
1759         struct ib_xrcd                  *xrcd = NULL;
1760         struct ib_uobject               *uninitialized_var(xrcd_uobj);
1761         struct ib_cq                    *scq = NULL, *rcq = NULL;
1762         struct ib_srq                   *srq = NULL;
1763         struct ib_qp                    *qp;
1764         char                            *buf;
1765         struct ib_qp_init_attr          attr;
1766         struct ib_uverbs_ex_create_qp_resp resp;
1767         int                             ret;
1768
1769         if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
1770                 return -EPERM;
1771
1772         obj = kzalloc(sizeof *obj, GFP_KERNEL);
1773         if (!obj)
1774                 return -ENOMEM;
1775
1776         init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
1777                   &qp_lock_class);
1778         down_write(&obj->uevent.uobject.mutex);
1779
1780         if (cmd->qp_type == IB_QPT_XRC_TGT) {
1781                 xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
1782                                      &xrcd_uobj);
1783                 if (!xrcd) {
1784                         ret = -EINVAL;
1785                         goto err_put;
1786                 }
1787                 device = xrcd->device;
1788         } else {
1789                 if (cmd->qp_type == IB_QPT_XRC_INI) {
1790                         cmd->max_recv_wr = 0;
1791                         cmd->max_recv_sge = 0;
1792                 } else {
1793                         if (cmd->is_srq) {
1794                                 srq = idr_read_srq(cmd->srq_handle,
1795                                                    file->ucontext);
1796                                 if (!srq || srq->srq_type != IB_SRQT_BASIC) {
1797                                         ret = -EINVAL;
1798                                         goto err_put;
1799                                 }
1800                         }
1801
1802                         if (cmd->recv_cq_handle != cmd->send_cq_handle) {
1803                                 rcq = idr_read_cq(cmd->recv_cq_handle,
1804                                                   file->ucontext, 0);
1805                                 if (!rcq) {
1806                                         ret = -EINVAL;
1807                                         goto err_put;
1808                                 }
1809                         }
1810                 }
1811
1812                 scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
1813                 rcq = rcq ?: scq;
1814                 pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
1815                 if (!pd || !scq) {
1816                         ret = -EINVAL;
1817                         goto err_put;
1818                 }
1819
1820                 device = pd->device;
1821         }
1822
1823         attr.event_handler = ib_uverbs_qp_event_handler;
1824         attr.qp_context    = file;
1825         attr.send_cq       = scq;
1826         attr.recv_cq       = rcq;
1827         attr.srq           = srq;
1828         attr.xrcd          = xrcd;
1829         attr.sq_sig_type   = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
1830                                               IB_SIGNAL_REQ_WR;
1831         attr.qp_type       = cmd->qp_type;
1832         attr.create_flags  = 0;
1833
1834         attr.cap.max_send_wr     = cmd->max_send_wr;
1835         attr.cap.max_recv_wr     = cmd->max_recv_wr;
1836         attr.cap.max_send_sge    = cmd->max_send_sge;
1837         attr.cap.max_recv_sge    = cmd->max_recv_sge;
1838         attr.cap.max_inline_data = cmd->max_inline_data;
1839
1840         obj->uevent.events_reported     = 0;
1841         INIT_LIST_HEAD(&obj->uevent.event_list);
1842         INIT_LIST_HEAD(&obj->mcast_list);
1843
1844         if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
1845                       sizeof(cmd->create_flags))
1846                 attr.create_flags = cmd->create_flags;
1847
1848         if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
1849                 ret = -EINVAL;
1850                 goto err_put;
1851         }
1852
1853         buf = (void *)cmd + sizeof(*cmd);
1854         if (cmd_sz > sizeof(*cmd))
1855                 if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
1856                                              cmd_sz - sizeof(*cmd) - 1))) {
1857                         ret = -EINVAL;
1858                         goto err_put;
1859                 }
1860
1861         if (cmd->qp_type == IB_QPT_XRC_TGT)
1862                 qp = ib_create_qp(pd, &attr);
1863         else
1864                 qp = device->create_qp(pd, &attr, uhw);
1865
1866         if (IS_ERR(qp)) {
1867                 ret = PTR_ERR(qp);
1868                 goto err_put;
1869         }
1870
1871         if (cmd->qp_type != IB_QPT_XRC_TGT) {
1872                 qp->real_qp       = qp;
1873                 qp->device        = device;
1874                 qp->pd            = pd;
1875                 qp->send_cq       = attr.send_cq;
1876                 qp->recv_cq       = attr.recv_cq;
1877                 qp->srq           = attr.srq;
1878                 qp->event_handler = attr.event_handler;
1879                 qp->qp_context    = attr.qp_context;
1880                 qp->qp_type       = attr.qp_type;
1881                 atomic_set(&qp->usecnt, 0);
1882                 atomic_inc(&pd->usecnt);
1883                 atomic_inc(&attr.send_cq->usecnt);
1884                 if (attr.recv_cq)
1885                         atomic_inc(&attr.recv_cq->usecnt);
1886                 if (attr.srq)
1887                         atomic_inc(&attr.srq->usecnt);
1888         }
1889         qp->uobject = &obj->uevent.uobject;
1890
1891         obj->uevent.uobject.object = qp;
1892         ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
1893         if (ret)
1894                 goto err_destroy;
1895
1896         memset(&resp, 0, sizeof resp);
1897         resp.base.qpn             = qp->qp_num;
1898         resp.base.qp_handle       = obj->uevent.uobject.id;
1899         resp.base.max_recv_sge    = attr.cap.max_recv_sge;
1900         resp.base.max_send_sge    = attr.cap.max_send_sge;
1901         resp.base.max_recv_wr     = attr.cap.max_recv_wr;
1902         resp.base.max_send_wr     = attr.cap.max_send_wr;
1903         resp.base.max_inline_data = attr.cap.max_inline_data;
1904
1905         resp.response_length = offsetof(typeof(resp), response_length) +
1906                                sizeof(resp.response_length);
1907
1908         ret = cb(file, &resp, ucore);
1909         if (ret)
1910                 goto err_cb;
1911
1912         if (xrcd) {
1913                 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
1914                                           uobject);
1915                 atomic_inc(&obj->uxrcd->refcnt);
1916                 put_xrcd_read(xrcd_uobj);
1917         }
1918
1919         if (pd)
1920                 put_pd_read(pd);
1921         if (scq)
1922                 put_cq_read(scq);
1923         if (rcq && rcq != scq)
1924                 put_cq_read(rcq);
1925         if (srq)
1926                 put_srq_read(srq);
1927
1928         mutex_lock(&file->mutex);
1929         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
1930         mutex_unlock(&file->mutex);
1931
1932         obj->uevent.uobject.live = 1;
1933
1934         up_write(&obj->uevent.uobject.mutex);
1935
1936         return 0;
1937 err_cb:
1938         idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
1939
1940 err_destroy:
1941         ib_destroy_qp(qp);
1942
1943 err_put:
1944         if (xrcd)
1945                 put_xrcd_read(xrcd_uobj);
1946         if (pd)
1947                 put_pd_read(pd);
1948         if (scq)
1949                 put_cq_read(scq);
1950         if (rcq && rcq != scq)
1951                 put_cq_read(rcq);
1952         if (srq)
1953                 put_srq_read(srq);
1954
1955         put_uobj_write(&obj->uevent.uobject);
1956         return ret;
1957 }
1958
1959 static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
1960                                   struct ib_uverbs_ex_create_qp_resp *resp,
1961                                   struct ib_udata *ucore)
1962 {
1963         if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
1964                 return -EFAULT;
1965
1966         return 0;
1967 }
1968
1969 ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1970                             struct ib_device *ib_dev,
1971                             const char __user *buf, int in_len,
1972                             int out_len)
1973 {
1974         struct ib_uverbs_create_qp      cmd;
1975         struct ib_uverbs_ex_create_qp   cmd_ex;
1976         struct ib_udata                 ucore;
1977         struct ib_udata                 uhw;
1978         ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
1979         int                             err;
1980
1981         if (out_len < resp_size)
1982                 return -ENOSPC;
1983
1984         if (copy_from_user(&cmd, buf, sizeof(cmd)))
1985                 return -EFAULT;
1986
1987         INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
1988                    resp_size);
1989         INIT_UDATA(&uhw, buf + sizeof(cmd),
1990                    (unsigned long)cmd.response + resp_size,
1991                    in_len - sizeof(cmd), out_len - resp_size);
1992
1993         memset(&cmd_ex, 0, sizeof(cmd_ex));
1994         cmd_ex.user_handle = cmd.user_handle;
1995         cmd_ex.pd_handle = cmd.pd_handle;
1996         cmd_ex.send_cq_handle = cmd.send_cq_handle;
1997         cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
1998         cmd_ex.srq_handle = cmd.srq_handle;
1999         cmd_ex.max_send_wr = cmd.max_send_wr;
2000         cmd_ex.max_recv_wr = cmd.max_recv_wr;
2001         cmd_ex.max_send_sge = cmd.max_send_sge;
2002         cmd_ex.max_recv_sge = cmd.max_recv_sge;
2003         cmd_ex.max_inline_data = cmd.max_inline_data;
2004         cmd_ex.sq_sig_all = cmd.sq_sig_all;
2005         cmd_ex.qp_type = cmd.qp_type;
2006         cmd_ex.is_srq = cmd.is_srq;
2007
2008         err = create_qp(file, &ucore, &uhw, &cmd_ex,
2009                         offsetof(typeof(cmd_ex), is_srq) +
2010                         sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
2011                         NULL);
2012
2013         if (err)
2014                 return err;
2015
2016         return in_len;
2017 }
2018
2019 static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
2020                                      struct ib_uverbs_ex_create_qp_resp *resp,
2021                                      struct ib_udata *ucore)
2022 {
2023         if (ib_copy_to_udata(ucore, resp, resp->response_length))
2024                 return -EFAULT;
2025
2026         return 0;
2027 }
2028
2029 int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
2030                            struct ib_device *ib_dev,
2031                            struct ib_udata *ucore,
2032                            struct ib_udata *uhw)
2033 {
2034         struct ib_uverbs_ex_create_qp_resp resp;
2035         struct ib_uverbs_ex_create_qp cmd = {0};
2036         int err;
2037
2038         if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
2039                             sizeof(cmd.comp_mask)))
2040                 return -EINVAL;
2041
2042         err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
2043         if (err)
2044                 return err;
2045
2046         if (cmd.comp_mask)
2047                 return -EINVAL;
2048
2049         if (cmd.reserved)
2050                 return -EINVAL;
2051
2052         if (ucore->outlen < (offsetof(typeof(resp), response_length) +
2053                              sizeof(resp.response_length)))
2054                 return -ENOSPC;
2055
2056         err = create_qp(file, ucore, uhw, &cmd,
2057                         min(ucore->inlen, sizeof(cmd)),
2058                         ib_uverbs_ex_create_qp_cb, NULL);
2059
2060         if (err)
2061                 return err;
2062
2063         return 0;
2064 }
2065
2066 ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
2067                           struct ib_device *ib_dev,
2068                           const char __user *buf, int in_len, int out_len)
2069 {
2070         struct ib_uverbs_open_qp        cmd;
2071         struct ib_uverbs_create_qp_resp resp;
2072         struct ib_udata                 udata;
2073         struct ib_uqp_object           *obj;
2074         struct ib_xrcd                 *xrcd;
2075         struct ib_uobject              *uninitialized_var(xrcd_uobj);
2076         struct ib_qp                   *qp;
2077         struct ib_qp_open_attr          attr;
2078         int ret;
2079
2080         if (out_len < sizeof resp)
2081                 return -ENOSPC;
2082
2083         if (copy_from_user(&cmd, buf, sizeof cmd))
2084                 return -EFAULT;
2085
2086         INIT_UDATA(&udata, buf + sizeof cmd,
2087                    (unsigned long) cmd.response + sizeof resp,
2088                    in_len - sizeof cmd, out_len - sizeof resp);
2089
2090         obj = kmalloc(sizeof *obj, GFP_KERNEL);
2091         if (!obj)
2092                 return -ENOMEM;
2093
2094         init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
2095         down_write(&obj->uevent.uobject.mutex);
2096
2097         xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
2098         if (!xrcd) {
2099                 ret = -EINVAL;
2100                 goto err_put;
2101         }
2102
2103         attr.event_handler = ib_uverbs_qp_event_handler;
2104         attr.qp_context    = file;
2105         attr.qp_num        = cmd.qpn;
2106         attr.qp_type       = cmd.qp_type;
2107
2108         obj->uevent.events_reported = 0;
2109         INIT_LIST_HEAD(&obj->uevent.event_list);
2110         INIT_LIST_HEAD(&obj->mcast_list);
2111
2112         qp = ib_open_qp(xrcd, &attr);
2113         if (IS_ERR(qp)) {
2114                 ret = PTR_ERR(qp);
2115                 goto err_put;
2116         }
2117
2118         qp->uobject = &obj->uevent.uobject;
2119
2120         obj->uevent.uobject.object = qp;
2121         ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
2122         if (ret)
2123                 goto err_destroy;
2124
2125         memset(&resp, 0, sizeof resp);
2126         resp.qpn       = qp->qp_num;
2127         resp.qp_handle = obj->uevent.uobject.id;
2128
2129         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2130                          &resp, sizeof resp)) {
2131                 ret = -EFAULT;
2132                 goto err_remove;
2133         }
2134
2135         obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
2136         atomic_inc(&obj->uxrcd->refcnt);
2137         put_xrcd_read(xrcd_uobj);
2138
2139         mutex_lock(&file->mutex);
2140         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
2141         mutex_unlock(&file->mutex);
2142
2143         obj->uevent.uobject.live = 1;
2144
2145         up_write(&obj->uevent.uobject.mutex);
2146
2147         return in_len;
2148
2149 err_remove:
2150         idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
2151
2152 err_destroy:
2153         ib_destroy_qp(qp);
2154
2155 err_put:
2156         put_xrcd_read(xrcd_uobj);
2157         put_uobj_write(&obj->uevent.uobject);
2158         return ret;
2159 }
2160
2161 ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
2162                            struct ib_device *ib_dev,
2163                            const char __user *buf, int in_len,
2164                            int out_len)
2165 {
2166         struct ib_uverbs_query_qp      cmd;
2167         struct ib_uverbs_query_qp_resp resp;
2168         struct ib_qp                   *qp;
2169         struct ib_qp_attr              *attr;
2170         struct ib_qp_init_attr         *init_attr;
2171         int                            ret;
2172
2173         if (copy_from_user(&cmd, buf, sizeof cmd))
2174                 return -EFAULT;
2175
2176         attr      = kmalloc(sizeof *attr, GFP_KERNEL);
2177         init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
2178         if (!attr || !init_attr) {
2179                 ret = -ENOMEM;
2180                 goto out;
2181         }
2182
2183         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2184         if (!qp) {
2185                 ret = -EINVAL;
2186                 goto out;
2187         }
2188
2189         ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
2190
2191         put_qp_read(qp);
2192
2193         if (ret)
2194                 goto out;
2195
2196         memset(&resp, 0, sizeof resp);
2197
2198         resp.qp_state               = attr->qp_state;
2199         resp.cur_qp_state           = attr->cur_qp_state;
2200         resp.path_mtu               = attr->path_mtu;
2201         resp.path_mig_state         = attr->path_mig_state;
2202         resp.qkey                   = attr->qkey;
2203         resp.rq_psn                 = attr->rq_psn;
2204         resp.sq_psn                 = attr->sq_psn;
2205         resp.dest_qp_num            = attr->dest_qp_num;
2206         resp.qp_access_flags        = attr->qp_access_flags;
2207         resp.pkey_index             = attr->pkey_index;
2208         resp.alt_pkey_index         = attr->alt_pkey_index;
2209         resp.sq_draining            = attr->sq_draining;
2210         resp.max_rd_atomic          = attr->max_rd_atomic;
2211         resp.max_dest_rd_atomic     = attr->max_dest_rd_atomic;
2212         resp.min_rnr_timer          = attr->min_rnr_timer;
2213         resp.port_num               = attr->port_num;
2214         resp.timeout                = attr->timeout;
2215         resp.retry_cnt              = attr->retry_cnt;
2216         resp.rnr_retry              = attr->rnr_retry;
2217         resp.alt_port_num           = attr->alt_port_num;
2218         resp.alt_timeout            = attr->alt_timeout;
2219
2220         memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
2221         resp.dest.flow_label        = attr->ah_attr.grh.flow_label;
2222         resp.dest.sgid_index        = attr->ah_attr.grh.sgid_index;
2223         resp.dest.hop_limit         = attr->ah_attr.grh.hop_limit;
2224         resp.dest.traffic_class     = attr->ah_attr.grh.traffic_class;
2225         resp.dest.dlid              = attr->ah_attr.dlid;
2226         resp.dest.sl                = attr->ah_attr.sl;
2227         resp.dest.src_path_bits     = attr->ah_attr.src_path_bits;
2228         resp.dest.static_rate       = attr->ah_attr.static_rate;
2229         resp.dest.is_global         = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
2230         resp.dest.port_num          = attr->ah_attr.port_num;
2231
2232         memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
2233         resp.alt_dest.flow_label    = attr->alt_ah_attr.grh.flow_label;
2234         resp.alt_dest.sgid_index    = attr->alt_ah_attr.grh.sgid_index;
2235         resp.alt_dest.hop_limit     = attr->alt_ah_attr.grh.hop_limit;
2236         resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
2237         resp.alt_dest.dlid          = attr->alt_ah_attr.dlid;
2238         resp.alt_dest.sl            = attr->alt_ah_attr.sl;
2239         resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
2240         resp.alt_dest.static_rate   = attr->alt_ah_attr.static_rate;
2241         resp.alt_dest.is_global     = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
2242         resp.alt_dest.port_num      = attr->alt_ah_attr.port_num;
2243
2244         resp.max_send_wr            = init_attr->cap.max_send_wr;
2245         resp.max_recv_wr            = init_attr->cap.max_recv_wr;
2246         resp.max_send_sge           = init_attr->cap.max_send_sge;
2247         resp.max_recv_sge           = init_attr->cap.max_recv_sge;
2248         resp.max_inline_data        = init_attr->cap.max_inline_data;
2249         resp.sq_sig_all             = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
2250
2251         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2252                          &resp, sizeof resp))
2253                 ret = -EFAULT;
2254
2255 out:
2256         kfree(attr);
2257         kfree(init_attr);
2258
2259         return ret ? ret : in_len;
2260 }
2261
2262 /* Remove ignored fields set in the attribute mask */
2263 static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
2264 {
2265         switch (qp_type) {
2266         case IB_QPT_XRC_INI:
2267                 return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER);
2268         case IB_QPT_XRC_TGT:
2269                 return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT |
2270                                 IB_QP_RNR_RETRY);
2271         default:
2272                 return mask;
2273         }
2274 }
2275
2276 ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
2277                             struct ib_device *ib_dev,
2278                             const char __user *buf, int in_len,
2279                             int out_len)
2280 {
2281         struct ib_uverbs_modify_qp cmd;
2282         struct ib_udata            udata;
2283         struct ib_qp              *qp;
2284         struct ib_qp_attr         *attr;
2285         int                        ret;
2286
2287         if (copy_from_user(&cmd, buf, sizeof cmd))
2288                 return -EFAULT;
2289
2290         INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
2291                    out_len);
2292
2293         attr = kmalloc(sizeof *attr, GFP_KERNEL);
2294         if (!attr)
2295                 return -ENOMEM;
2296
2297         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2298         if (!qp) {
2299                 ret = -EINVAL;
2300                 goto out;
2301         }
2302
2303         attr->qp_state            = cmd.qp_state;
2304         attr->cur_qp_state        = cmd.cur_qp_state;
2305         attr->path_mtu            = cmd.path_mtu;
2306         attr->path_mig_state      = cmd.path_mig_state;
2307         attr->qkey                = cmd.qkey;
2308         attr->rq_psn              = cmd.rq_psn;
2309         attr->sq_psn              = cmd.sq_psn;
2310         attr->dest_qp_num         = cmd.dest_qp_num;
2311         attr->qp_access_flags     = cmd.qp_access_flags;
2312         attr->pkey_index          = cmd.pkey_index;
2313         attr->alt_pkey_index      = cmd.alt_pkey_index;
2314         attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
2315         attr->max_rd_atomic       = cmd.max_rd_atomic;
2316         attr->max_dest_rd_atomic  = cmd.max_dest_rd_atomic;
2317         attr->min_rnr_timer       = cmd.min_rnr_timer;
2318         attr->port_num            = cmd.port_num;
2319         attr->timeout             = cmd.timeout;
2320         attr->retry_cnt           = cmd.retry_cnt;
2321         attr->rnr_retry           = cmd.rnr_retry;
2322         attr->alt_port_num        = cmd.alt_port_num;
2323         attr->alt_timeout         = cmd.alt_timeout;
2324
2325         memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
2326         attr->ah_attr.grh.flow_label        = cmd.dest.flow_label;
2327         attr->ah_attr.grh.sgid_index        = cmd.dest.sgid_index;
2328         attr->ah_attr.grh.hop_limit         = cmd.dest.hop_limit;
2329         attr->ah_attr.grh.traffic_class     = cmd.dest.traffic_class;
2330         attr->ah_attr.dlid                  = cmd.dest.dlid;
2331         attr->ah_attr.sl                    = cmd.dest.sl;
2332         attr->ah_attr.src_path_bits         = cmd.dest.src_path_bits;
2333         attr->ah_attr.static_rate           = cmd.dest.static_rate;
2334         attr->ah_attr.ah_flags              = cmd.dest.is_global ? IB_AH_GRH : 0;
2335         attr->ah_attr.port_num              = cmd.dest.port_num;
2336
2337         memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
2338         attr->alt_ah_attr.grh.flow_label    = cmd.alt_dest.flow_label;
2339         attr->alt_ah_attr.grh.sgid_index    = cmd.alt_dest.sgid_index;
2340         attr->alt_ah_attr.grh.hop_limit     = cmd.alt_dest.hop_limit;
2341         attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
2342         attr->alt_ah_attr.dlid              = cmd.alt_dest.dlid;
2343         attr->alt_ah_attr.sl                = cmd.alt_dest.sl;
2344         attr->alt_ah_attr.src_path_bits     = cmd.alt_dest.src_path_bits;
2345         attr->alt_ah_attr.static_rate       = cmd.alt_dest.static_rate;
2346         attr->alt_ah_attr.ah_flags          = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
2347         attr->alt_ah_attr.port_num          = cmd.alt_dest.port_num;
2348
2349         if (qp->real_qp == qp) {
2350                 ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
2351                 if (ret)
2352                         goto release_qp;
2353                 ret = qp->device->modify_qp(qp, attr,
2354                         modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
2355         } else {
2356                 ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
2357         }
2358
2359         if (ret)
2360                 goto release_qp;
2361
2362         ret = in_len;
2363
2364 release_qp:
2365         put_qp_read(qp);
2366
2367 out:
2368         kfree(attr);
2369
2370         return ret;
2371 }
2372
2373 ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
2374                              struct ib_device *ib_dev,
2375                              const char __user *buf, int in_len,
2376                              int out_len)
2377 {
2378         struct ib_uverbs_destroy_qp      cmd;
2379         struct ib_uverbs_destroy_qp_resp resp;
2380         struct ib_uobject               *uobj;
2381         struct ib_qp                    *qp;
2382         struct ib_uqp_object            *obj;
2383         int                              ret = -EINVAL;
2384
2385         if (copy_from_user(&cmd, buf, sizeof cmd))
2386                 return -EFAULT;
2387
2388         memset(&resp, 0, sizeof resp);
2389
2390         uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
2391         if (!uobj)
2392                 return -EINVAL;
2393         qp  = uobj->object;
2394         obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
2395
2396         if (!list_empty(&obj->mcast_list)) {
2397                 put_uobj_write(uobj);
2398                 return -EBUSY;
2399         }
2400
2401         ret = ib_destroy_qp(qp);
2402         if (!ret)
2403                 uobj->live = 0;
2404
2405         put_uobj_write(uobj);
2406
2407         if (ret)
2408                 return ret;
2409
2410         if (obj->uxrcd)
2411                 atomic_dec(&obj->uxrcd->refcnt);
2412
2413         idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
2414
2415         mutex_lock(&file->mutex);
2416         list_del(&uobj->list);
2417         mutex_unlock(&file->mutex);
2418
2419         ib_uverbs_release_uevent(file, &obj->uevent);
2420
2421         resp.events_reported = obj->uevent.events_reported;
2422
2423         put_uobj(uobj);
2424
2425         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2426                          &resp, sizeof resp))
2427                 return -EFAULT;
2428
2429         return in_len;
2430 }
2431
2432 static void *alloc_wr(size_t wr_size, __u32 num_sge)
2433 {
2434         return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
2435                          num_sge * sizeof (struct ib_sge), GFP_KERNEL);
2436 };
2437
2438 ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2439                             struct ib_device *ib_dev,
2440                             const char __user *buf, int in_len,
2441                             int out_len)
2442 {
2443         struct ib_uverbs_post_send      cmd;
2444         struct ib_uverbs_post_send_resp resp;
2445         struct ib_uverbs_send_wr       *user_wr;
2446         struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
2447         struct ib_qp                   *qp;
2448         int                             i, sg_ind;
2449         int                             is_ud;
2450         ssize_t                         ret = -EINVAL;
2451         size_t                          next_size;
2452
2453         if (copy_from_user(&cmd, buf, sizeof cmd))
2454                 return -EFAULT;
2455
2456         if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
2457             cmd.sge_count * sizeof (struct ib_uverbs_sge))
2458                 return -EINVAL;
2459
2460         if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
2461                 return -EINVAL;
2462
2463         user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
2464         if (!user_wr)
2465                 return -ENOMEM;
2466
2467         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2468         if (!qp)
2469                 goto out;
2470
2471         is_ud = qp->qp_type == IB_QPT_UD;
2472         sg_ind = 0;
2473         last = NULL;
2474         for (i = 0; i < cmd.wr_count; ++i) {
2475                 if (copy_from_user(user_wr,
2476                                    buf + sizeof cmd + i * cmd.wqe_size,
2477                                    cmd.wqe_size)) {
2478                         ret = -EFAULT;
2479                         goto out_put;
2480                 }
2481
2482                 if (user_wr->num_sge + sg_ind > cmd.sge_count) {
2483                         ret = -EINVAL;
2484                         goto out_put;
2485                 }
2486
2487                 if (is_ud) {
2488                         struct ib_ud_wr *ud;
2489
2490                         if (user_wr->opcode != IB_WR_SEND &&
2491                             user_wr->opcode != IB_WR_SEND_WITH_IMM) {
2492                                 ret = -EINVAL;
2493                                 goto out_put;
2494                         }
2495
2496                         next_size = sizeof(*ud);
2497                         ud = alloc_wr(next_size, user_wr->num_sge);
2498                         if (!ud) {
2499                                 ret = -ENOMEM;
2500                                 goto out_put;
2501                         }
2502
2503                         ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
2504                         if (!ud->ah) {
2505                                 kfree(ud);
2506                                 ret = -EINVAL;
2507                                 goto out_put;
2508                         }
2509                         ud->remote_qpn = user_wr->wr.ud.remote_qpn;
2510                         ud->remote_qkey = user_wr->wr.ud.remote_qkey;
2511
2512                         next = &ud->wr;
2513                 } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2514                            user_wr->opcode == IB_WR_RDMA_WRITE ||
2515                            user_wr->opcode == IB_WR_RDMA_READ) {
2516                         struct ib_rdma_wr *rdma;
2517
2518                         next_size = sizeof(*rdma);
2519                         rdma = alloc_wr(next_size, user_wr->num_sge);
2520                         if (!rdma) {
2521                                 ret = -ENOMEM;
2522                                 goto out_put;
2523                         }
2524
2525                         rdma->remote_addr = user_wr->wr.rdma.remote_addr;
2526                         rdma->rkey = user_wr->wr.rdma.rkey;
2527
2528                         next = &rdma->wr;
2529                 } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
2530                            user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2531                         struct ib_atomic_wr *atomic;
2532
2533                         next_size = sizeof(*atomic);
2534                         atomic = alloc_wr(next_size, user_wr->num_sge);
2535                         if (!atomic) {
2536                                 ret = -ENOMEM;
2537                                 goto out_put;
2538                         }
2539
2540                         atomic->remote_addr = user_wr->wr.atomic.remote_addr;
2541                         atomic->compare_add = user_wr->wr.atomic.compare_add;
2542                         atomic->swap = user_wr->wr.atomic.swap;
2543                         atomic->rkey = user_wr->wr.atomic.rkey;
2544
2545                         next = &atomic->wr;
2546                 } else if (user_wr->opcode == IB_WR_SEND ||
2547                            user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2548                            user_wr->opcode == IB_WR_SEND_WITH_INV) {
2549                         next_size = sizeof(*next);
2550                         next = alloc_wr(next_size, user_wr->num_sge);
2551                         if (!next) {
2552                                 ret = -ENOMEM;
2553                                 goto out_put;
2554                         }
2555                 } else {
2556                         ret = -EINVAL;
2557                         goto out_put;
2558                 }
2559
2560                 if (user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2561                     user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
2562                         next->ex.imm_data =
2563                                         (__be32 __force) user_wr->ex.imm_data;
2564                 } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) {
2565                         next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey;
2566                 }
2567
2568                 if (!last)
2569                         wr = next;
2570                 else
2571                         last->next = next;
2572                 last = next;
2573
2574                 next->next       = NULL;
2575                 next->wr_id      = user_wr->wr_id;
2576                 next->num_sge    = user_wr->num_sge;
2577                 next->opcode     = user_wr->opcode;
2578                 next->send_flags = user_wr->send_flags;
2579
2580                 if (next->num_sge) {
2581                         next->sg_list = (void *) next +
2582                                 ALIGN(next_size, sizeof(struct ib_sge));
2583                         if (copy_from_user(next->sg_list,
2584                                            buf + sizeof cmd +
2585                                            cmd.wr_count * cmd.wqe_size +
2586                                            sg_ind * sizeof (struct ib_sge),
2587                                            next->num_sge * sizeof (struct ib_sge))) {
2588                                 ret = -EFAULT;
2589                                 goto out_put;
2590                         }
2591                         sg_ind += next->num_sge;
2592                 } else
2593                         next->sg_list = NULL;
2594         }
2595
2596         resp.bad_wr = 0;
2597         ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);
2598         if (ret)
2599                 for (next = wr; next; next = next->next) {
2600                         ++resp.bad_wr;
2601                         if (next == bad_wr)
2602                                 break;
2603                 }
2604
2605         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2606                          &resp, sizeof resp))
2607                 ret = -EFAULT;
2608
2609 out_put:
2610         put_qp_read(qp);
2611
2612         while (wr) {
2613                 if (is_ud && ud_wr(wr)->ah)
2614                         put_ah_read(ud_wr(wr)->ah);
2615                 next = wr->next;
2616                 kfree(wr);
2617                 wr = next;
2618         }
2619
2620 out:
2621         kfree(user_wr);
2622
2623         return ret ? ret : in_len;
2624 }
2625
2626 static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
2627                                                     int in_len,
2628                                                     u32 wr_count,
2629                                                     u32 sge_count,
2630                                                     u32 wqe_size)
2631 {
2632         struct ib_uverbs_recv_wr *user_wr;
2633         struct ib_recv_wr        *wr = NULL, *last, *next;
2634         int                       sg_ind;
2635         int                       i;
2636         int                       ret;
2637
2638         if (in_len < wqe_size * wr_count +
2639             sge_count * sizeof (struct ib_uverbs_sge))
2640                 return ERR_PTR(-EINVAL);
2641
2642         if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
2643                 return ERR_PTR(-EINVAL);
2644
2645         user_wr = kmalloc(wqe_size, GFP_KERNEL);
2646         if (!user_wr)
2647                 return ERR_PTR(-ENOMEM);
2648
2649         sg_ind = 0;
2650         last = NULL;
2651         for (i = 0; i < wr_count; ++i) {
2652                 if (copy_from_user(user_wr, buf + i * wqe_size,
2653                                    wqe_size)) {
2654                         ret = -EFAULT;
2655                         goto err;
2656                 }
2657
2658                 if (user_wr->num_sge + sg_ind > sge_count) {
2659                         ret = -EINVAL;
2660                         goto err;
2661                 }
2662
2663                 next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
2664                                user_wr->num_sge * sizeof (struct ib_sge),
2665                                GFP_KERNEL);
2666                 if (!next) {
2667                         ret = -ENOMEM;
2668                         goto err;
2669                 }
2670
2671                 if (!last)
2672                         wr = next;
2673                 else
2674                         last->next = next;
2675                 last = next;
2676
2677                 next->next       = NULL;
2678                 next->wr_id      = user_wr->wr_id;
2679                 next->num_sge    = user_wr->num_sge;
2680
2681                 if (next->num_sge) {
2682                         next->sg_list = (void *) next +
2683                                 ALIGN(sizeof *next, sizeof (struct ib_sge));
2684                         if (copy_from_user(next->sg_list,
2685                                            buf + wr_count * wqe_size +
2686                                            sg_ind * sizeof (struct ib_sge),
2687                                            next->num_sge * sizeof (struct ib_sge))) {
2688                                 ret = -EFAULT;
2689                                 goto err;
2690                         }
2691                         sg_ind += next->num_sge;
2692                 } else
2693                         next->sg_list = NULL;
2694         }
2695
2696         kfree(user_wr);
2697         return wr;
2698
2699 err:
2700         kfree(user_wr);
2701
2702         while (wr) {
2703                 next = wr->next;
2704                 kfree(wr);
2705                 wr = next;
2706         }
2707
2708         return ERR_PTR(ret);
2709 }
2710
2711 ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
2712                             struct ib_device *ib_dev,
2713                             const char __user *buf, int in_len,
2714                             int out_len)
2715 {
2716         struct ib_uverbs_post_recv      cmd;
2717         struct ib_uverbs_post_recv_resp resp;
2718         struct ib_recv_wr              *wr, *next, *bad_wr;
2719         struct ib_qp                   *qp;
2720         ssize_t                         ret = -EINVAL;
2721
2722         if (copy_from_user(&cmd, buf, sizeof cmd))
2723                 return -EFAULT;
2724
2725         wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
2726                                        in_len - sizeof cmd, cmd.wr_count,
2727                                        cmd.sge_count, cmd.wqe_size);
2728         if (IS_ERR(wr))
2729                 return PTR_ERR(wr);
2730
2731         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2732         if (!qp)
2733                 goto out;
2734
2735         resp.bad_wr = 0;
2736         ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
2737
2738         put_qp_read(qp);
2739
2740         if (ret)
2741                 for (next = wr; next; next = next->next) {
2742                         ++resp.bad_wr;
2743                         if (next == bad_wr)
2744                                 break;
2745                 }
2746
2747         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2748                          &resp, sizeof resp))
2749                 ret = -EFAULT;
2750
2751 out:
2752         while (wr) {
2753                 next = wr->next;
2754                 kfree(wr);
2755                 wr = next;
2756         }
2757
2758         return ret ? ret : in_len;
2759 }
2760
2761 ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
2762                                 struct ib_device *ib_dev,
2763                                 const char __user *buf, int in_len,
2764                                 int out_len)
2765 {
2766         struct ib_uverbs_post_srq_recv      cmd;
2767         struct ib_uverbs_post_srq_recv_resp resp;
2768         struct ib_recv_wr                  *wr, *next, *bad_wr;
2769         struct ib_srq                      *srq;
2770         ssize_t                             ret = -EINVAL;
2771
2772         if (copy_from_user(&cmd, buf, sizeof cmd))
2773                 return -EFAULT;
2774
2775         wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
2776                                        in_len - sizeof cmd, cmd.wr_count,
2777                                        cmd.sge_count, cmd.wqe_size);
2778         if (IS_ERR(wr))
2779                 return PTR_ERR(wr);
2780
2781         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
2782         if (!srq)
2783                 goto out;
2784
2785         resp.bad_wr = 0;
2786         ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
2787
2788         put_srq_read(srq);
2789
2790         if (ret)
2791                 for (next = wr; next; next = next->next) {
2792                         ++resp.bad_wr;
2793                         if (next == bad_wr)
2794                                 break;
2795                 }
2796
2797         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2798                          &resp, sizeof resp))
2799                 ret = -EFAULT;
2800
2801 out:
2802         while (wr) {
2803                 next = wr->next;
2804                 kfree(wr);
2805                 wr = next;
2806         }
2807
2808         return ret ? ret : in_len;
2809 }
2810
2811 ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
2812                             struct ib_device *ib_dev,
2813                             const char __user *buf, int in_len,
2814                             int out_len)
2815 {
2816         struct ib_uverbs_create_ah       cmd;
2817         struct ib_uverbs_create_ah_resp  resp;
2818         struct ib_uobject               *uobj;
2819         struct ib_pd                    *pd;
2820         struct ib_ah                    *ah;
2821         struct ib_ah_attr               attr;
2822         int ret;
2823
2824         if (out_len < sizeof resp)
2825                 return -ENOSPC;
2826
2827         if (copy_from_user(&cmd, buf, sizeof cmd))
2828                 return -EFAULT;
2829
2830         uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
2831         if (!uobj)
2832                 return -ENOMEM;
2833
2834         init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
2835         down_write(&uobj->mutex);
2836
2837         pd = idr_read_pd(cmd.pd_handle, file->ucontext);
2838         if (!pd) {
2839                 ret = -EINVAL;
2840                 goto err;
2841         }
2842
2843         attr.dlid              = cmd.attr.dlid;
2844         attr.sl                = cmd.attr.sl;
2845         attr.src_path_bits     = cmd.attr.src_path_bits;
2846         attr.static_rate       = cmd.attr.static_rate;
2847         attr.ah_flags          = cmd.attr.is_global ? IB_AH_GRH : 0;
2848         attr.port_num          = cmd.attr.port_num;
2849         attr.grh.flow_label    = cmd.attr.grh.flow_label;
2850         attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
2851         attr.grh.hop_limit     = cmd.attr.grh.hop_limit;
2852         attr.grh.traffic_class = cmd.attr.grh.traffic_class;
2853         memset(&attr.dmac, 0, sizeof(attr.dmac));
2854         memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
2855
2856         ah = ib_create_ah(pd, &attr);
2857         if (IS_ERR(ah)) {
2858                 ret = PTR_ERR(ah);
2859                 goto err_put;
2860         }
2861
2862         ah->uobject  = uobj;
2863         uobj->object = ah;
2864
2865         ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
2866         if (ret)
2867                 goto err_destroy;
2868
2869         resp.ah_handle = uobj->id;
2870
2871         if (copy_to_user((void __user *) (unsigned long) cmd.response,
2872                          &resp, sizeof resp)) {
2873                 ret = -EFAULT;
2874                 goto err_copy;
2875         }
2876
2877         put_pd_read(pd);
2878
2879         mutex_lock(&file->mutex);
2880         list_add_tail(&uobj->list, &file->ucontext->ah_list);
2881         mutex_unlock(&file->mutex);
2882
2883         uobj->live = 1;
2884
2885         up_write(&uobj->mutex);
2886
2887         return in_len;
2888
2889 err_copy:
2890         idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
2891
2892 err_destroy:
2893         ib_destroy_ah(ah);
2894
2895 err_put:
2896         put_pd_read(pd);
2897
2898 err:
2899         put_uobj_write(uobj);
2900         return ret;
2901 }
2902
2903 ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
2904                              struct ib_device *ib_dev,
2905                              const char __user *buf, int in_len, int out_len)
2906 {
2907         struct ib_uverbs_destroy_ah cmd;
2908         struct ib_ah               *ah;
2909         struct ib_uobject          *uobj;
2910         int                         ret;
2911
2912         if (copy_from_user(&cmd, buf, sizeof cmd))
2913                 return -EFAULT;
2914
2915         uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
2916         if (!uobj)
2917                 return -EINVAL;
2918         ah = uobj->object;
2919
2920         ret = ib_destroy_ah(ah);
2921         if (!ret)
2922                 uobj->live = 0;
2923
2924         put_uobj_write(uobj);
2925
2926         if (ret)
2927                 return ret;
2928
2929         idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
2930
2931         mutex_lock(&file->mutex);
2932         list_del(&uobj->list);
2933         mutex_unlock(&file->mutex);
2934
2935         put_uobj(uobj);
2936
2937         return in_len;
2938 }
2939
2940 ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
2941                                struct ib_device *ib_dev,
2942                                const char __user *buf, int in_len,
2943                                int out_len)
2944 {
2945         struct ib_uverbs_attach_mcast cmd;
2946         struct ib_qp                 *qp;
2947         struct ib_uqp_object         *obj;
2948         struct ib_uverbs_mcast_entry *mcast;
2949         int                           ret;
2950
2951         if (copy_from_user(&cmd, buf, sizeof cmd))
2952                 return -EFAULT;
2953
2954         qp = idr_write_qp(cmd.qp_handle, file->ucontext);
2955         if (!qp)
2956                 return -EINVAL;
2957
2958         obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
2959
2960         list_for_each_entry(mcast, &obj->mcast_list, list)
2961                 if (cmd.mlid == mcast->lid &&
2962                     !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
2963                         ret = 0;
2964                         goto out_put;
2965                 }
2966
2967         mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
2968         if (!mcast) {
2969                 ret = -ENOMEM;
2970                 goto out_put;
2971         }
2972
2973         mcast->lid = cmd.mlid;
2974         memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw);
2975
2976         ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid);
2977         if (!ret)
2978                 list_add_tail(&mcast->list, &obj->mcast_list);
2979         else
2980                 kfree(mcast);
2981
2982 out_put:
2983         put_qp_write(qp);
2984
2985         return ret ? ret : in_len;
2986 }
2987
2988 ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
2989                                struct ib_device *ib_dev,
2990                                const char __user *buf, int in_len,
2991                                int out_len)
2992 {
2993         struct ib_uverbs_detach_mcast cmd;
2994         struct ib_uqp_object         *obj;
2995         struct ib_qp                 *qp;
2996         struct ib_uverbs_mcast_entry *mcast;
2997         int                           ret = -EINVAL;
2998
2999         if (copy_from_user(&cmd, buf, sizeof cmd))
3000                 return -EFAULT;
3001
3002         qp = idr_write_qp(cmd.qp_handle, file->ucontext);
3003         if (!qp)
3004                 return -EINVAL;
3005
3006         ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
3007         if (ret)
3008                 goto out_put;
3009
3010         obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
3011
3012         list_for_each_entry(mcast, &obj->mcast_list, list)
3013                 if (cmd.mlid == mcast->lid &&
3014                     !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
3015                         list_del(&mcast->list);
3016                         kfree(mcast);
3017                         break;
3018                 }
3019
3020 out_put:
3021         put_qp_write(qp);
3022
3023         return ret ? ret : in_len;
3024 }
3025
3026 static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
3027                                 union ib_flow_spec *ib_spec)
3028 {
3029         if (kern_spec->reserved)
3030                 return -EINVAL;
3031
3032         ib_spec->type = kern_spec->type;
3033
3034         switch (ib_spec->type) {
3035         case IB_FLOW_SPEC_ETH:
3036                 ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
3037                 if (ib_spec->eth.size != kern_spec->eth.size)
3038                         return -EINVAL;
3039                 memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
3040                        sizeof(struct ib_flow_eth_filter));
3041                 memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
3042                        sizeof(struct ib_flow_eth_filter));
3043                 break;
3044         case IB_FLOW_SPEC_IPV4:
3045                 ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
3046                 if (ib_spec->ipv4.size != kern_spec->ipv4.size)
3047                         return -EINVAL;
3048                 memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
3049                        sizeof(struct ib_flow_ipv4_filter));
3050                 memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
3051                        sizeof(struct ib_flow_ipv4_filter));
3052                 break;
3053         case IB_FLOW_SPEC_TCP:
3054         case IB_FLOW_SPEC_UDP:
3055                 ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
3056                 if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size)
3057                         return -EINVAL;
3058                 memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
3059                        sizeof(struct ib_flow_tcp_udp_filter));
3060                 memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
3061                        sizeof(struct ib_flow_tcp_udp_filter));
3062                 break;
3063         default:
3064                 return -EINVAL;
3065         }
3066         return 0;
3067 }
3068
3069 int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3070                              struct ib_device *ib_dev,
3071                              struct ib_udata *ucore,
3072                              struct ib_udata *uhw)
3073 {
3074         struct ib_uverbs_create_flow      cmd;
3075         struct ib_uverbs_create_flow_resp resp;
3076         struct ib_uobject                 *uobj;
3077         struct ib_flow                    *flow_id;
3078         struct ib_uverbs_flow_attr        *kern_flow_attr;
3079         struct ib_flow_attr               *flow_attr;
3080         struct ib_qp                      *qp;
3081         int err = 0;
3082         void *kern_spec;
3083         void *ib_spec;
3084         int i;
3085
3086         if (ucore->inlen < sizeof(cmd))
3087                 return -EINVAL;
3088
3089         if (ucore->outlen < sizeof(resp))
3090                 return -ENOSPC;
3091
3092         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
3093         if (err)
3094                 return err;
3095
3096         ucore->inbuf += sizeof(cmd);
3097         ucore->inlen -= sizeof(cmd);
3098
3099         if (cmd.comp_mask)
3100                 return -EINVAL;
3101
3102         if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER &&
3103              !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
3104                 return -EPERM;
3105
3106         if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
3107                 return -EINVAL;
3108
3109         if (cmd.flow_attr.size > ucore->inlen ||
3110             cmd.flow_attr.size >
3111             (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
3112                 return -EINVAL;
3113
3114         if (cmd.flow_attr.reserved[0] ||
3115             cmd.flow_attr.reserved[1])
3116                 return -EINVAL;
3117
3118         if (cmd.flow_attr.num_of_specs) {
3119                 kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
3120                                          GFP_KERNEL);
3121                 if (!kern_flow_attr)
3122                         return -ENOMEM;
3123
3124                 memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
3125                 err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
3126                                          cmd.flow_attr.size);
3127                 if (err)
3128                         goto err_free_attr;
3129         } else {
3130                 kern_flow_attr = &cmd.flow_attr;
3131         }
3132
3133         uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
3134         if (!uobj) {
3135                 err = -ENOMEM;
3136                 goto err_free_attr;
3137         }
3138         init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
3139         down_write(&uobj->mutex);
3140
3141         qp = idr_read_qp(cmd.qp_handle, file->ucontext);
3142         if (!qp) {
3143                 err = -EINVAL;
3144                 goto err_uobj;
3145         }
3146
3147         flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
3148         if (!flow_attr) {
3149                 err = -ENOMEM;
3150                 goto err_put;
3151         }
3152
3153         flow_attr->type = kern_flow_attr->type;
3154         flow_attr->priority = kern_flow_attr->priority;
3155         flow_attr->num_of_specs = kern_flow_attr->num_of_specs;
3156         flow_attr->port = kern_flow_attr->port;
3157         flow_attr->flags = kern_flow_attr->flags;
3158         flow_attr->size = sizeof(*flow_attr);
3159
3160         kern_spec = kern_flow_attr + 1;
3161         ib_spec = flow_attr + 1;
3162         for (i = 0; i < flow_attr->num_of_specs &&
3163              cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
3164              cmd.flow_attr.size >=
3165              ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
3166                 err = kern_spec_to_ib_spec(kern_spec, ib_spec);
3167                 if (err)
3168                         goto err_free;
3169                 flow_attr->size +=
3170                         ((union ib_flow_spec *) ib_spec)->size;
3171                 cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
3172                 kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
3173                 ib_spec += ((union ib_flow_spec *) ib_spec)->size;
3174         }
3175         if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
3176                 pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
3177                         i, cmd.flow_attr.size);
3178                 err = -EINVAL;
3179                 goto err_free;
3180         }
3181         flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
3182         if (IS_ERR(flow_id)) {
3183                 err = PTR_ERR(flow_id);
3184                 goto err_free;
3185         }
3186         flow_id->qp = qp;
3187         flow_id->uobject = uobj;
3188         uobj->object = flow_id;
3189
3190         err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
3191         if (err)
3192                 goto destroy_flow;
3193
3194         memset(&resp, 0, sizeof(resp));
3195         resp.flow_handle = uobj->id;
3196
3197         err = ib_copy_to_udata(ucore,
3198                                &resp, sizeof(resp));
3199         if (err)
3200                 goto err_copy;
3201
3202         put_qp_read(qp);
3203         mutex_lock(&file->mutex);
3204         list_add_tail(&uobj->list, &file->ucontext->rule_list);
3205         mutex_unlock(&file->mutex);
3206
3207         uobj->live = 1;
3208
3209         up_write(&uobj->mutex);
3210         kfree(flow_attr);
3211         if (cmd.flow_attr.num_of_specs)
3212                 kfree(kern_flow_attr);
3213         return 0;
3214 err_copy:
3215         idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
3216 destroy_flow:
3217         ib_destroy_flow(flow_id);
3218 err_free:
3219         kfree(flow_attr);
3220 err_put:
3221         put_qp_read(qp);
3222 err_uobj:
3223         put_uobj_write(uobj);
3224 err_free_attr:
3225         if (cmd.flow_attr.num_of_specs)
3226                 kfree(kern_flow_attr);
3227         return err;
3228 }
3229
3230 int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
3231                               struct ib_device *ib_dev,
3232                               struct ib_udata *ucore,
3233                               struct ib_udata *uhw)
3234 {
3235         struct ib_uverbs_destroy_flow   cmd;
3236         struct ib_flow                  *flow_id;
3237         struct ib_uobject               *uobj;
3238         int                             ret;
3239
3240         if (ucore->inlen < sizeof(cmd))
3241                 return -EINVAL;
3242
3243         ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
3244         if (ret)
3245                 return ret;
3246
3247         if (cmd.comp_mask)
3248                 return -EINVAL;
3249
3250         uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
3251                               file->ucontext);
3252         if (!uobj)
3253                 return -EINVAL;
3254         flow_id = uobj->object;
3255
3256         ret = ib_destroy_flow(flow_id);
3257         if (!ret)
3258                 uobj->live = 0;
3259
3260         put_uobj_write(uobj);
3261
3262         idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
3263
3264         mutex_lock(&file->mutex);
3265         list_del(&uobj->list);
3266         mutex_unlock(&file->mutex);
3267
3268         put_uobj(uobj);
3269
3270         return ret;
3271 }
3272
3273 static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
3274                                 struct ib_device *ib_dev,
3275                                 struct ib_uverbs_create_xsrq *cmd,
3276                                 struct ib_udata *udata)
3277 {
3278         struct ib_uverbs_create_srq_resp resp;
3279         struct ib_usrq_object           *obj;
3280         struct ib_pd                    *pd;
3281         struct ib_srq                   *srq;
3282         struct ib_uobject               *uninitialized_var(xrcd_uobj);
3283         struct ib_srq_init_attr          attr;
3284         int ret;
3285
3286         obj = kmalloc(sizeof *obj, GFP_KERNEL);
3287         if (!obj)
3288                 return -ENOMEM;
3289
3290         init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
3291         down_write(&obj->uevent.uobject.mutex);
3292
3293         if (cmd->srq_type == IB_SRQT_XRC) {
3294                 attr.ext.xrc.xrcd  = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
3295                 if (!attr.ext.xrc.xrcd) {
3296                         ret = -EINVAL;
3297                         goto err;
3298                 }
3299
3300                 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
3301                 atomic_inc(&obj->uxrcd->refcnt);
3302
3303                 attr.ext.xrc.cq  = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
3304                 if (!attr.ext.xrc.cq) {
3305                         ret = -EINVAL;
3306                         goto err_put_xrcd;
3307                 }
3308         }
3309
3310         pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
3311         if (!pd) {
3312                 ret = -EINVAL;
3313                 goto err_put_cq;
3314         }
3315
3316         attr.event_handler  = ib_uverbs_srq_event_handler;
3317         attr.srq_context    = file;
3318         attr.srq_type       = cmd->srq_type;
3319         attr.attr.max_wr    = cmd->max_wr;
3320         attr.attr.max_sge   = cmd->max_sge;
3321         attr.attr.srq_limit = cmd->srq_limit;
3322
3323         obj->uevent.events_reported = 0;
3324         INIT_LIST_HEAD(&obj->uevent.event_list);
3325
3326         srq = pd->device->create_srq(pd, &attr, udata);
3327         if (IS_ERR(srq)) {
3328                 ret = PTR_ERR(srq);
3329                 goto err_put;
3330         }
3331
3332         srq->device        = pd->device;
3333         srq->pd            = pd;
3334         srq->srq_type      = cmd->srq_type;
3335         srq->uobject       = &obj->uevent.uobject;
3336         srq->event_handler = attr.event_handler;
3337         srq->srq_context   = attr.srq_context;
3338
3339         if (cmd->srq_type == IB_SRQT_XRC) {
3340                 srq->ext.xrc.cq   = attr.ext.xrc.cq;
3341                 srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
3342                 atomic_inc(&attr.ext.xrc.cq->usecnt);
3343                 atomic_inc(&attr.ext.xrc.xrcd->usecnt);
3344         }
3345
3346         atomic_inc(&pd->usecnt);
3347         atomic_set(&srq->usecnt, 0);
3348
3349         obj->uevent.uobject.object = srq;
3350         ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
3351         if (ret)
3352                 goto err_destroy;
3353
3354         memset(&resp, 0, sizeof resp);
3355         resp.srq_handle = obj->uevent.uobject.id;
3356         resp.max_wr     = attr.attr.max_wr;
3357         resp.max_sge    = attr.attr.max_sge;
3358         if (cmd->srq_type == IB_SRQT_XRC)
3359                 resp.srqn = srq->ext.xrc.srq_num;
3360
3361         if (copy_to_user((void __user *) (unsigned long) cmd->response,
3362                          &resp, sizeof resp)) {
3363                 ret = -EFAULT;
3364                 goto err_copy;
3365         }
3366
3367         if (cmd->srq_type == IB_SRQT_XRC) {
3368                 put_uobj_read(xrcd_uobj);
3369                 put_cq_read(attr.ext.xrc.cq);
3370         }
3371         put_pd_read(pd);
3372
3373         mutex_lock(&file->mutex);
3374         list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
3375         mutex_unlock(&file->mutex);
3376
3377         obj->uevent.uobject.live = 1;
3378
3379         up_write(&obj->uevent.uobject.mutex);
3380
3381         return 0;
3382
3383 err_copy:
3384         idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
3385
3386 err_destroy:
3387         ib_destroy_srq(srq);
3388
3389 err_put:
3390         put_pd_read(pd);
3391
3392 err_put_cq:
3393         if (cmd->srq_type == IB_SRQT_XRC)
3394                 put_cq_read(attr.ext.xrc.cq);
3395
3396 err_put_xrcd:
3397         if (cmd->srq_type == IB_SRQT_XRC) {
3398                 atomic_dec(&obj->uxrcd->refcnt);
3399                 put_uobj_read(xrcd_uobj);
3400         }
3401
3402 err:
3403         put_uobj_write(&obj->uevent.uobject);
3404         return ret;
3405 }
3406
3407 ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
3408                              struct ib_device *ib_dev,
3409                              const char __user *buf, int in_len,
3410                              int out_len)
3411 {
3412         struct ib_uverbs_create_srq      cmd;
3413         struct ib_uverbs_create_xsrq     xcmd;
3414         struct ib_uverbs_create_srq_resp resp;
3415         struct ib_udata                  udata;
3416         int ret;
3417
3418         if (out_len < sizeof resp)
3419                 return -ENOSPC;
3420
3421         if (copy_from_user(&cmd, buf, sizeof cmd))
3422                 return -EFAULT;
3423
3424         xcmd.response    = cmd.response;
3425         xcmd.user_handle = cmd.user_handle;
3426         xcmd.srq_type    = IB_SRQT_BASIC;
3427         xcmd.pd_handle   = cmd.pd_handle;
3428         xcmd.max_wr      = cmd.max_wr;
3429         xcmd.max_sge     = cmd.max_sge;
3430         xcmd.srq_limit   = cmd.srq_limit;
3431
3432         INIT_UDATA(&udata, buf + sizeof cmd,
3433                    (unsigned long) cmd.response + sizeof resp,
3434                    in_len - sizeof cmd, out_len - sizeof resp);
3435
3436         ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
3437         if (ret)
3438                 return ret;
3439
3440         return in_len;
3441 }
3442
3443 ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
3444                               struct ib_device *ib_dev,
3445                               const char __user *buf, int in_len, int out_len)
3446 {
3447         struct ib_uverbs_create_xsrq     cmd;
3448         struct ib_uverbs_create_srq_resp resp;
3449         struct ib_udata                  udata;
3450         int ret;
3451
3452         if (out_len < sizeof resp)
3453                 return -ENOSPC;
3454
3455         if (copy_from_user(&cmd, buf, sizeof cmd))
3456                 return -EFAULT;
3457
3458         INIT_UDATA(&udata, buf + sizeof cmd,
3459                    (unsigned long) cmd.response + sizeof resp,
3460                    in_len - sizeof cmd, out_len - sizeof resp);
3461
3462         ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
3463         if (ret)
3464                 return ret;
3465
3466         return in_len;
3467 }
3468
3469 ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
3470                              struct ib_device *ib_dev,
3471                              const char __user *buf, int in_len,
3472                              int out_len)
3473 {
3474         struct ib_uverbs_modify_srq cmd;
3475         struct ib_udata             udata;
3476         struct ib_srq              *srq;
3477         struct ib_srq_attr          attr;
3478         int                         ret;
3479
3480         if (copy_from_user(&cmd, buf, sizeof cmd))
3481                 return -EFAULT;
3482
3483         INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
3484                    out_len);
3485
3486         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
3487         if (!srq)
3488                 return -EINVAL;
3489
3490         attr.max_wr    = cmd.max_wr;
3491         attr.srq_limit = cmd.srq_limit;
3492
3493         ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
3494
3495         put_srq_read(srq);
3496
3497         return ret ? ret : in_len;
3498 }
3499
3500 ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
3501                             struct ib_device *ib_dev,
3502                             const char __user *buf,
3503                             int in_len, int out_len)
3504 {
3505         struct ib_uverbs_query_srq      cmd;
3506         struct ib_uverbs_query_srq_resp resp;
3507         struct ib_srq_attr              attr;
3508         struct ib_srq                   *srq;
3509         int                             ret;
3510
3511         if (out_len < sizeof resp)
3512                 return -ENOSPC;
3513
3514         if (copy_from_user(&cmd, buf, sizeof cmd))
3515                 return -EFAULT;
3516
3517         srq = idr_read_srq(cmd.srq_handle, file->ucontext);
3518         if (!srq)
3519                 return -EINVAL;
3520
3521         ret = ib_query_srq(srq, &attr);
3522
3523         put_srq_read(srq);
3524
3525         if (ret)
3526                 return ret;
3527
3528         memset(&resp, 0, sizeof resp);
3529
3530         resp.max_wr    = attr.max_wr;
3531         resp.max_sge   = attr.max_sge;
3532         resp.srq_limit = attr.srq_limit;
3533
3534         if (copy_to_user((void __user *) (unsigned long) cmd.response,
3535                          &resp, sizeof resp))
3536                 return -EFAULT;
3537
3538         return in_len;
3539 }
3540
3541 ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
3542                               struct ib_device *ib_dev,
3543                               const char __user *buf, int in_len,
3544                               int out_len)
3545 {
3546         struct ib_uverbs_destroy_srq      cmd;
3547         struct ib_uverbs_destroy_srq_resp resp;
3548         struct ib_uobject                *uobj;
3549         struct ib_srq                    *srq;
3550         struct ib_uevent_object          *obj;
3551         int                               ret = -EINVAL;
3552         struct ib_usrq_object            *us;
3553         enum ib_srq_type                  srq_type;
3554
3555         if (copy_from_user(&cmd, buf, sizeof cmd))
3556                 return -EFAULT;
3557
3558         uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
3559         if (!uobj)
3560                 return -EINVAL;
3561         srq = uobj->object;
3562         obj = container_of(uobj, struct ib_uevent_object, uobject);
3563         srq_type = srq->srq_type;
3564
3565         ret = ib_destroy_srq(srq);
3566         if (!ret)
3567                 uobj->live = 0;
3568
3569         put_uobj_write(uobj);
3570
3571         if (ret)
3572                 return ret;
3573
3574         if (srq_type == IB_SRQT_XRC) {
3575                 us = container_of(obj, struct ib_usrq_object, uevent);
3576                 atomic_dec(&us->uxrcd->refcnt);
3577         }
3578
3579         idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
3580
3581         mutex_lock(&file->mutex);
3582         list_del(&uobj->list);
3583         mutex_unlock(&file->mutex);
3584
3585         ib_uverbs_release_uevent(file, obj);
3586
3587         memset(&resp, 0, sizeof resp);
3588         resp.events_reported = obj->events_reported;
3589
3590         put_uobj(uobj);
3591
3592         if (copy_to_user((void __user *) (unsigned long) cmd.response,
3593                          &resp, sizeof resp))
3594                 ret = -EFAULT;
3595
3596         return ret ? ret : in_len;
3597 }
3598
3599 int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
3600                               struct ib_device *ib_dev,
3601                               struct ib_udata *ucore,
3602                               struct ib_udata *uhw)
3603 {
3604         struct ib_uverbs_ex_query_device_resp resp;
3605         struct ib_uverbs_ex_query_device  cmd;
3606         struct ib_device_attr attr;
3607         int err;
3608
3609         if (ucore->inlen < sizeof(cmd))
3610                 return -EINVAL;
3611
3612         err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
3613         if (err)
3614                 return err;
3615
3616         if (cmd.comp_mask)
3617                 return -EINVAL;
3618
3619         if (cmd.reserved)
3620                 return -EINVAL;
3621
3622         resp.response_length = offsetof(typeof(resp), odp_caps);
3623
3624         if (ucore->outlen < resp.response_length)
3625                 return -ENOSPC;
3626
3627         memset(&attr, 0, sizeof(attr));
3628
3629         err = ib_dev->query_device(ib_dev, &attr, uhw);
3630         if (err)
3631                 return err;
3632
3633         copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
3634         resp.comp_mask = 0;
3635
3636         if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
3637                 goto end;
3638
3639 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
3640         resp.odp_caps.general_caps = attr.odp_caps.general_caps;
3641         resp.odp_caps.per_transport_caps.rc_odp_caps =
3642                 attr.odp_caps.per_transport_caps.rc_odp_caps;
3643         resp.odp_caps.per_transport_caps.uc_odp_caps =
3644                 attr.odp_caps.per_transport_caps.uc_odp_caps;
3645         resp.odp_caps.per_transport_caps.ud_odp_caps =
3646                 attr.odp_caps.per_transport_caps.ud_odp_caps;
3647         resp.odp_caps.reserved = 0;
3648 #else
3649         memset(&resp.odp_caps, 0, sizeof(resp.odp_caps));
3650 #endif
3651         resp.response_length += sizeof(resp.odp_caps);
3652
3653         if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask))
3654                 goto end;
3655
3656         resp.timestamp_mask = attr.timestamp_mask;
3657         resp.response_length += sizeof(resp.timestamp_mask);
3658
3659         if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock))
3660                 goto end;
3661
3662         resp.hca_core_clock = attr.hca_core_clock;
3663         resp.response_length += sizeof(resp.hca_core_clock);
3664
3665 end:
3666         err = ib_copy_to_udata(ucore, &resp, resp.response_length);
3667         if (err)
3668                 return err;
3669
3670         return 0;
3671 }