These changes are the raw update to linux-4.4.6-rt14. Kernel sources

[kvmfornfv.git] / kernel / drivers / staging / lustre / lustre / osc / osc_request.c
diff --git a/kernel/drivers/staging/lustre/lustre/osc/osc_request.c b/kernel/drivers/staging/lustre/lustre/osc/osc_request.c

index d7a9b65..367f83a 100644 (file)
--- a/kernel/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/kernel/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -38,7 +38,6 @@
  
  #include "../../include/linux/libcfs/libcfs.h"
  
-
  #include "../include/lustre_dlm.h"
  #include "../include/lustre_net.h"
  #include "../include/lustre/lustre_user.h"
@@ -50,9 +49,18 @@
  #include "../include/lustre_param.h"
  #include "../include/lustre_fid.h"
  #include "../include/obd_class.h"
+#include "../include/obd.h"
  #include "osc_internal.h"
  #include "osc_cl_internal.h"
  
+atomic_t osc_pool_req_count;
+unsigned int osc_reqpool_maxreqcount;
+struct ptlrpc_request_pool *osc_rq_pool;
+
+/* max memory used for request pool, unit is MB */
+static unsigned int osc_reqpool_mem_max = 5;
+module_param(osc_reqpool_mem_max, uint, 0444);
+
  struct osc_brw_async_args {
         struct obdo       *aa_oa;
         int             aa_requested_nob;
@@ -63,7 +71,6 @@ struct osc_brw_async_args {
         struct client_obd *aa_cli;
         struct list_head         aa_oaps;
         struct list_head         aa_exts;
-       struct obd_capa   *aa_ocapa;
         struct cl_req     *aa_clerq;
  };
  
@@ -110,7 +117,7 @@ static int osc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
                 return lmm_size;
  
         if (*lmmp != NULL && lsm == NULL) {
-               OBD_FREE(*lmmp, lmm_size);
+               kfree(*lmmp);
                 *lmmp = NULL;
                 return 0;
         } else if (unlikely(lsm != NULL && ostid_id(&lsm->lsm_oi) == 0)) {
@@ -118,8 +125,8 @@ static int osc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
         }
  
         if (*lmmp == NULL) {
-               OBD_ALLOC(*lmmp, lmm_size);
-               if (*lmmp == NULL)
+               *lmmp = kzalloc(lmm_size, GFP_NOFS);
+               if (!*lmmp)
                         return -ENOMEM;
         }
  
@@ -157,19 +164,20 @@ static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
                 return lsm_size;
  
         if (*lsmp != NULL && lmm == NULL) {
-               OBD_FREE((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
-               OBD_FREE(*lsmp, lsm_size);
+               kfree((*lsmp)->lsm_oinfo[0]);
+               kfree(*lsmp);
                 *lsmp = NULL;
                 return 0;
         }
  
         if (*lsmp == NULL) {
-               OBD_ALLOC(*lsmp, lsm_size);
+               *lsmp = kzalloc(lsm_size, GFP_NOFS);
                 if (unlikely(*lsmp == NULL))
                         return -ENOMEM;
-               OBD_ALLOC((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
+               (*lsmp)->lsm_oinfo[0] = kzalloc(sizeof(struct lov_oinfo),
+                                               GFP_NOFS);
                 if (unlikely((*lsmp)->lsm_oinfo[0] == NULL)) {
-                       OBD_FREE(*lsmp, lsm_size);
+                       kfree(*lsmp);
                         return -ENOMEM;
                 }
                 loi_init((*lsmp)->lsm_oinfo[0]);
@@ -190,22 +198,6 @@ static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
         return lsm_size;
  }
  
-static inline void osc_pack_capa(struct ptlrpc_request *req,
-                                struct ost_body *body, void *capa)
-{
-       struct obd_capa *oc = (struct obd_capa *)capa;
-       struct lustre_capa *c;
-
-       if (!capa)
-               return;
-
-       c = req_capsule_client_get(&req->rq_pill, &RMF_CAPA1);
-       LASSERT(c);
-       capa_cpy(c, oc);
-       body->oa.o_valid |= OBD_MD_FLOSSCAPA;
-       DEBUG_CAPA(D_SEC, c, "pack");
-}
-
  static inline void osc_pack_req_body(struct ptlrpc_request *req,
                                      struct obd_info *oinfo)
  {
@@ -216,18 +208,6 @@ static inline void osc_pack_req_body(struct ptlrpc_request *req,
  
         lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
                              oinfo->oi_oa);
-       osc_pack_capa(req, body, oinfo->oi_capa);
-}
-
-static inline void osc_set_capa_size(struct ptlrpc_request *req,
-                                    const struct req_msg_field *field,
-                                    struct obd_capa *oc)
-{
-       if (oc == NULL)
-               req_capsule_set_size(&req->rq_pill, field, RCL_CLIENT, 0);
-       else
-               /* it is already calculated as sizeof struct obd_capa */
-               ;
  }
  
  static int osc_getattr_interpret(const struct lu_env *env,
@@ -263,13 +243,12 @@ static int osc_getattr_async(struct obd_export *exp, struct obd_info *oinfo,
  {
         struct ptlrpc_request *req;
         struct osc_async_args *aa;
-       int                 rc;
+       int rc;
  
         req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
         if (req == NULL)
                 return -ENOMEM;
  
-       osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
         rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
         if (rc) {
                 ptlrpc_request_free(req);
@@ -293,14 +272,13 @@ static int osc_getattr(const struct lu_env *env, struct obd_export *exp,
                        struct obd_info *oinfo)
  {
         struct ptlrpc_request *req;
-       struct ost_body       *body;
-       int                 rc;
+       struct ost_body *body;
+       int rc;
  
         req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
         if (req == NULL)
                 return -ENOMEM;
  
-       osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
         rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
         if (rc) {
                 ptlrpc_request_free(req);
@@ -337,8 +315,8 @@ static int osc_setattr(const struct lu_env *env, struct obd_export *exp,
                        struct obd_info *oinfo, struct obd_trans_info *oti)
  {
         struct ptlrpc_request *req;
-       struct ost_body       *body;
-       int                 rc;
+       struct ost_body *body;
+       int rc;
  
         LASSERT(oinfo->oi_oa->o_valid & OBD_MD_FLGROUP);
  
@@ -346,7 +324,6 @@ static int osc_setattr(const struct lu_env *env, struct obd_export *exp,
         if (req == NULL)
                 return -ENOMEM;
  
-       osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
         rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
         if (rc) {
                 ptlrpc_request_free(req);
@@ -402,15 +379,14 @@ int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
                            obd_enqueue_update_f upcall, void *cookie,
                            struct ptlrpc_request_set *rqset)
  {
-       struct ptlrpc_request   *req;
+       struct ptlrpc_request *req;
         struct osc_setattr_args *sa;
-       int                   rc;
+       int rc;
  
         req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
         if (req == NULL)
                 return -ENOMEM;
  
-       osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
         rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
         if (rc) {
                 ptlrpc_request_free(req);
@@ -427,19 +403,19 @@ int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
         /* do mds to ost setattr asynchronously */
         if (!rqset) {
                 /* Do not wait for response. */
-               ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+               ptlrpcd_add_req(req);
         } else {
                 req->rq_interpret_reply =
                         (ptlrpc_interpterer_t)osc_setattr_interpret;
  
-               CLASSERT (sizeof(*sa) <= sizeof(req->rq_async_args));
+               CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
                 sa = ptlrpc_req_async_args(req);
                 sa->sa_oa = oinfo->oi_oa;
                 sa->sa_upcall = upcall;
                 sa->sa_cookie = cookie;
  
                 if (rqset == PTLRPCD_SET)
-                       ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+                       ptlrpcd_add_req(req);
                 else
                         ptlrpc_set_add_req(rqset, req);
         }
@@ -459,9 +435,9 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa,
                     struct lov_stripe_md **ea, struct obd_trans_info *oti)
  {
         struct ptlrpc_request *req;
-       struct ost_body       *body;
-       struct lov_stripe_md  *lsm;
-       int                 rc;
+       struct ost_body *body;
+       struct lov_stripe_md *lsm;
+       int rc;
  
         LASSERT(oa);
         LASSERT(ea);
@@ -547,16 +523,15 @@ int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
                    obd_enqueue_update_f upcall, void *cookie,
                    struct ptlrpc_request_set *rqset)
  {
-       struct ptlrpc_request   *req;
+       struct ptlrpc_request *req;
         struct osc_setattr_args *sa;
-       struct ost_body  *body;
-       int                   rc;
+       struct ost_body *body;
+       int rc;
  
         req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_PUNCH);
         if (req == NULL)
                 return -ENOMEM;
  
-       osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
         rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_PUNCH);
         if (rc) {
                 ptlrpc_request_free(req);
@@ -569,18 +544,17 @@ int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
         LASSERT(body);
         lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
                              oinfo->oi_oa);
-       osc_pack_capa(req, body, oinfo->oi_capa);
  
         ptlrpc_request_set_replen(req);
  
         req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret;
-       CLASSERT (sizeof(*sa) <= sizeof(req->rq_async_args));
+       CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
         sa = ptlrpc_req_async_args(req);
-       sa->sa_oa     = oinfo->oi_oa;
+       sa->sa_oa = oinfo->oi_oa;
         sa->sa_upcall = upcall;
         sa->sa_cookie = cookie;
         if (rqset == PTLRPCD_SET)
-               ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+               ptlrpcd_add_req(req);
         else
                 ptlrpc_set_add_req(rqset, req);
  
@@ -599,7 +573,7 @@ static int osc_sync_interpret(const struct lu_env *env,
  
         body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
         if (body == NULL) {
-               CERROR ("can't unpack ost_body\n");
+               CERROR("can't unpack ost_body\n");
                 rc = -EPROTO;
                 goto out;
         }
@@ -615,15 +589,14 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
                   struct ptlrpc_request_set *rqset)
  {
         struct ptlrpc_request *req;
-       struct ost_body       *body;
+       struct ost_body *body;
         struct osc_fsync_args *fa;
-       int                 rc;
+       int rc;
  
         req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SYNC);
         if (req == NULL)
                 return -ENOMEM;
  
-       osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
         rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SYNC);
         if (rc) {
                 ptlrpc_request_free(req);
@@ -635,7 +608,6 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
         LASSERT(body);
         lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
                              oinfo->oi_oa);
-       osc_pack_capa(req, body, oinfo->oi_capa);
  
         ptlrpc_request_set_replen(req);
         req->rq_interpret_reply = osc_sync_interpret;
@@ -647,7 +619,7 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
         fa->fa_cookie = cookie;
  
         if (rqset == PTLRPCD_SET)
-               ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+               ptlrpcd_add_req(req);
         else
                 ptlrpc_set_add_req(rqset, req);
  
@@ -753,12 +725,11 @@ int osc_create(const struct lu_env *env, struct obd_export *exp,
   * cookies to the MDS after committing destroy transactions. */
  static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
                        struct obdo *oa, struct lov_stripe_md *ea,
-                      struct obd_trans_info *oti, struct obd_export *md_export,
-                      void *capa)
+                      struct obd_trans_info *oti, struct obd_export *md_export)
  {
-       struct client_obd     *cli = &exp->exp_obd->u.cli;
+       struct client_obd *cli = &exp->exp_obd->u.cli;
         struct ptlrpc_request *req;
-       struct ost_body       *body;
+       struct ost_body *body;
         LIST_HEAD(cancels);
         int rc, count;
  
@@ -776,7 +747,6 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
                 return -ENOMEM;
         }
  
-       osc_set_capa_size(req, &RMF_CAPA1, (struct obd_capa *)capa);
         rc = ldlm_prep_elc_req(exp, req, LUSTRE_OST_VERSION, OST_DESTROY,
                                0, &cancels, count);
         if (rc) {
@@ -793,7 +763,6 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
         LASSERT(body);
         lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
  
-       osc_pack_capa(req, body, (struct obd_capa *)capa);
         ptlrpc_request_set_replen(req);
  
         /* If osc_destroy is for destroying the unlink orphan,
@@ -816,7 +785,7 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
         }
  
         /* Do not wait for response */
-       ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+       ptlrpcd_add_req(req);
         return 0;
  }
  
@@ -909,7 +878,7 @@ static int osc_shrink_grant_interpret(const struct lu_env *env,
         LASSERT(body);
         osc_update_grant(cli, body);
  out:
-       OBDO_FREE(oa);
+       kmem_cache_free(obdo_cachep, oa);
         return rc;
  }
  
@@ -946,7 +915,7 @@ static int osc_shrink_grant(struct client_obd *cli)
  
  int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
  {
-       int                     rc = 0;
+       int rc = 0;
         struct ost_body *body;
  
         client_obd_list_lock(&cli->cl_loi_list_lock);
@@ -962,7 +931,7 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
         }
         client_obd_list_unlock(&cli->cl_loi_list_lock);
  
-       OBD_ALLOC_PTR(body);
+       body = kzalloc(sizeof(*body), GFP_NOFS);
         if (!body)
                 return -ENOMEM;
  
@@ -984,7 +953,7 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
                                 sizeof(*body), body, NULL);
         if (rc != 0)
                 __osc_update_grant(cli, body->oa.o_grant);
-       OBD_FREE_PTR(body);
+       kfree(body);
         return rc;
  }
  
@@ -1006,8 +975,8 @@ static int osc_should_shrink_grant(struct client_obd *client)
                 if (client->cl_import->imp_state == LUSTRE_IMP_FULL &&
                     client->cl_avail_grant > brw_size)
                         return 1;
-               else
-                       osc_update_next_shrink(client);
+
+               osc_update_next_shrink(client);
         }
         return 0;
  }
@@ -1099,7 +1068,7 @@ static void handle_short_read(int nob_read, u32 page_count,
  
         /* skip bytes read OK */
         while (nob_read > 0) {
-               LASSERT (page_count > 0);
+               LASSERT(page_count > 0);
  
                 if (pga[i]->count > nob_read) {
                         /* EOF inside this page */
@@ -1130,8 +1099,8 @@ static int check_write_rcs(struct ptlrpc_request *req,
                            int requested_nob, int niocount,
                            u32 page_count, struct brw_page **pga)
  {
-       int     i;
-       __u32   *remote_rcs;
+       int i;
+       __u32 *remote_rcs;
  
         remote_rcs = req_capsule_server_sized_get(&req->rq_pill, &RMF_RCS,
                                                   sizeof(*remote_rcs) *
@@ -1181,15 +1150,15 @@ static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2)
  }
  
  static u32 osc_checksum_bulk(int nob, u32 pg_count,
-                                  struct brw_page **pga, int opc,
-                                  cksum_type_t cksum_type)
+                            struct brw_page **pga, int opc,
+                            cksum_type_t cksum_type)
  {
-       __u32                           cksum;
-       int                             i = 0;
-       struct cfs_crypto_hash_desc     *hdesc;
-       unsigned int                    bufsize;
-       int                             err;
-       unsigned char                   cfs_alg = cksum_obd2cfs(cksum_type);
+       __u32 cksum;
+       int i = 0;
+       struct cfs_crypto_hash_desc *hdesc;
+       unsigned int bufsize;
+       int err;
+       unsigned char cfs_alg = cksum_obd2cfs(cksum_type);
  
         LASSERT(pg_count > 0);
  
@@ -1209,6 +1178,7 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
                     OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) {
                         unsigned char *ptr = kmap(pga[i]->pg);
                         int off = pga[i]->off & ~CFS_PAGE_MASK;
+
                         memcpy(ptr + off, "bad1", min(4, nob));
                         kunmap(pga[i]->pg);
                 }
@@ -1246,17 +1216,17 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
                                 struct lov_stripe_md *lsm, u32 page_count,
                                 struct brw_page **pga,
                                 struct ptlrpc_request **reqp,
-                               struct obd_capa *ocapa, int reserve,
+                               int reserve,
                                 int resend)
  {
-       struct ptlrpc_request   *req;
+       struct ptlrpc_request *req;
         struct ptlrpc_bulk_desc *desc;
-       struct ost_body  *body;
-       struct obd_ioobj        *ioobj;
-       struct niobuf_remote    *niobuf;
+       struct ost_body *body;
+       struct obd_ioobj *ioobj;
+       struct niobuf_remote *niobuf;
         int niocount, i, requested_nob, opc, rc;
         struct osc_brw_async_args *aa;
-       struct req_capsule      *pill;
+       struct req_capsule *pill;
         struct brw_page *pg_prev;
  
         if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ))
@@ -1267,7 +1237,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
         if ((cmd & OBD_BRW_WRITE) != 0) {
                 opc = OST_WRITE;
                 req = ptlrpc_request_alloc_pool(cli->cl_import,
-                                               cli->cl_import->imp_rq_pool,
+                                               osc_rq_pool,
                                                 &RQF_OST_BRW_WRITE);
         } else {
                 opc = OST_READ;
@@ -1286,7 +1256,6 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
                              sizeof(*ioobj));
         req_capsule_set_size(pill, &RMF_NIOBUF_REMOTE, RCL_CLIENT,
                              niocount * sizeof(*niobuf));
-       osc_set_capa_size(req, &RMF_CAPA1, ocapa);
  
         rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, opc);
         if (rc) {
@@ -1325,7 +1294,6 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
          * "max - 1" for old client compatibility sending "0", and also so the
          * the actual maximum is a power-of-two number, not one less. LU-1431 */
         ioobj_max_brw_set(ioobj, desc->bd_md_max_brw);
-       osc_pack_capa(req, body, ocapa);
         LASSERT(page_count > 0);
         pg_prev = pga[0];
         for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
@@ -1358,8 +1326,8 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
                         niobuf->len += pg->count;
                 } else {
                         niobuf->offset = pg->off;
-                       niobuf->len    = pg->count;
-                       niobuf->flags  = pg->flag;
+                       niobuf->len = pg->count;
+                       niobuf->flags = pg->flag;
                 }
                 pg_prev = pg;
         }
@@ -1434,8 +1402,6 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
         aa->aa_ppga = pga;
         aa->aa_cli = cli;
         INIT_LIST_HEAD(&aa->aa_oaps);
-       if (ocapa && reserve)
-               aa->aa_ocapa = capa_get(ocapa);
  
         *reqp = req;
         return 0;
@@ -1570,7 +1536,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
         }
  
         if (rc != req->rq_bulk->bd_nob_transferred) {
-               CERROR ("Unexpected rc %d (%d transferred)\n",
+               CERROR("Unexpected rc %d (%d transferred)\n",
                         rc, req->rq_bulk->bd_nob_transferred);
                 return -EPROTO;
         }
@@ -1580,20 +1546,18 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
  
         if (body->oa.o_valid & OBD_MD_FLCKSUM) {
                 static int cksum_counter;
-               __u32      server_cksum = body->oa.o_cksum;
-               char      *via;
-               char      *router;
+               __u32 server_cksum = body->oa.o_cksum;
+               char *via = "";
+               char *router = "";
                 cksum_type_t cksum_type;
  
-               cksum_type = cksum_type_unpack(body->oa.o_valid &OBD_MD_FLFLAGS?
+               cksum_type = cksum_type_unpack(body->oa.o_valid&OBD_MD_FLFLAGS ?
                                                body->oa.o_flags : 0);
                 client_cksum = osc_checksum_bulk(rc, aa->aa_page_count,
                                                  aa->aa_ppga, OST_READ,
                                                  cksum_type);
  
-               if (peer->nid == req->rq_bulk->bd_sender) {
-                       via = router = "";
-               } else {
+               if (peer->nid != req->rq_bulk->bd_sender) {
                         via = " via ";
                         router = libcfs_nid2str(req->rq_bulk->bd_sender);
                 }
@@ -1653,11 +1617,11 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
                   "redo for recoverable error %d", rc);
  
         rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
-                                       OST_WRITE ? OBD_BRW_WRITE :OBD_BRW_READ,
+                                       OST_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
                                   aa->aa_cli, aa->aa_oa,
                                   NULL /* lsm unused by osc currently */,
                                   aa->aa_page_count, aa->aa_ppga,
-                                 &new_req, aa->aa_ocapa, 0, 1);
+                                 &new_req, 0, 1);
         if (rc)
                 return rc;
  
@@ -1680,9 +1644,9 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
         /* cap resend delay to the current request timeout, this is similar to
          * what ptlrpc does (see after_reply()) */
         if (aa->aa_resends > new_req->rq_timeout)
-               new_req->rq_sent = get_seconds() + new_req->rq_timeout;
+               new_req->rq_sent = ktime_get_real_seconds() + new_req->rq_timeout;
         else
-               new_req->rq_sent = get_seconds() + aa->aa_resends;
+               new_req->rq_sent = ktime_get_real_seconds() + aa->aa_resends;
         new_req->rq_generation_set = 1;
         new_req->rq_import_generation = request->rq_import_generation;
  
@@ -1701,14 +1665,11 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
                 }
         }
  
-       new_aa->aa_ocapa = aa->aa_ocapa;
-       aa->aa_ocapa = NULL;
-
         /* XXX: This code will run into problem if we're going to support
          * to add a series of BRW RPCs into a self-defined ptlrpc_request_set
          * and wait for all of them to be finished. We should inherit request
          * set from old request. */
-       ptlrpcd_add_req(new_req, PDL_POLICY_SAME, -1);
+       ptlrpcd_add_req(new_req);
  
         DEBUG_REQ(D_INFO, new_req, "new request");
         return 0;
@@ -1748,7 +1709,7 @@ static void sort_brw_pages(struct brw_page **array, int num)
  static void osc_release_ppga(struct brw_page **ppga, u32 count)
  {
         LASSERT(ppga != NULL);
-       OBD_FREE(ppga, sizeof(*ppga) * count);
+       kfree(ppga);
  }
  
  static int brw_interpret(const struct lu_env *env,
@@ -1757,7 +1718,7 @@ static int brw_interpret(const struct lu_env *env,
         struct osc_brw_async_args *aa = data;
         struct osc_extent *ext;
         struct osc_extent *tmp;
-       struct cl_object  *obj = NULL;
+       struct cl_object *obj = NULL;
         struct client_obd *cli = aa->aa_cli;
  
         rc = osc_brw_fini_request(req, rc);
@@ -1785,11 +1746,6 @@ static int brw_interpret(const struct lu_env *env,
                         rc = -EIO;
         }
  
-       if (aa->aa_ocapa) {
-               capa_put(aa->aa_ocapa);
-               aa->aa_ocapa = NULL;
-       }
-
         list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
                 if (obj == NULL && rc == 0) {
                         obj = osc2cl(ext->oe_obj);
@@ -1831,7 +1787,7 @@ static int brw_interpret(const struct lu_env *env,
                 }
                 cl_object_put(env, obj);
         }
-       OBDO_FREE(aa->aa_oa);
+       kmem_cache_free(obdo_cachep, aa->aa_oa);
  
         cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc :
                           req->rq_bulk->bd_nob_transferred);
@@ -1849,7 +1805,7 @@ static int brw_interpret(const struct lu_env *env,
         osc_wake_cache_waiters(cli);
         client_obd_list_unlock(&cli->cl_loi_list_lock);
  
-       osc_io_unplug(env, cli, NULL, PDL_POLICY_SAME);
+       osc_io_unplug(env, cli, NULL);
         return rc;
  }
  
@@ -1859,28 +1815,27 @@ static int brw_interpret(const struct lu_env *env,
   * Extents in the list must be in OES_RPC state.
   */
  int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
-                 struct list_head *ext_list, int cmd, pdl_policy_t pol)
-{
-       struct ptlrpc_request           *req = NULL;
-       struct osc_extent               *ext;
-       struct brw_page                 **pga = NULL;
-       struct osc_brw_async_args       *aa = NULL;
-       struct obdo                     *oa = NULL;
-       struct osc_async_page           *oap;
-       struct osc_async_page           *tmp;
-       struct cl_req                   *clerq = NULL;
-       enum cl_req_type                crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE :
-                                                                     CRT_READ;
-       struct ldlm_lock                *lock = NULL;
-       struct cl_req_attr              *crattr = NULL;
-       u64                             starting_offset = OBD_OBJECT_EOF;
-       u64                             ending_offset = 0;
-       int                             mpflag = 0;
-       int                             mem_tight = 0;
-       int                             page_count = 0;
-       int                             i;
-       int                             rc;
-       struct ost_body                 *body;
+                 struct list_head *ext_list, int cmd)
+{
+       struct ptlrpc_request *req = NULL;
+       struct osc_extent *ext;
+       struct brw_page **pga = NULL;
+       struct osc_brw_async_args *aa = NULL;
+       struct obdo *oa = NULL;
+       struct osc_async_page *oap;
+       struct osc_async_page *tmp;
+       struct cl_req *clerq = NULL;
+       enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ;
+       struct ldlm_lock *lock = NULL;
+       struct cl_req_attr *crattr = NULL;
+       u64 starting_offset = OBD_OBJECT_EOF;
+       u64 ending_offset = 0;
+       int mpflag = 0;
+       int mem_tight = 0;
+       int page_count = 0;
+       int i;
+       int rc;
+       struct ost_body *body;
         LIST_HEAD(rpc_list);
  
         LASSERT(!list_empty(ext_list));
@@ -1908,19 +1863,19 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
         if (mem_tight)
                 mpflag = cfs_memory_pressure_get_and_set();
  
-       OBD_ALLOC(crattr, sizeof(*crattr));
-       if (crattr == NULL) {
+       crattr = kzalloc(sizeof(*crattr), GFP_NOFS);
+       if (!crattr) {
                 rc = -ENOMEM;
                 goto out;
         }
  
-       OBD_ALLOC(pga, sizeof(*pga) * page_count);
+       pga = kcalloc(page_count, sizeof(*pga), GFP_NOFS);
         if (pga == NULL) {
                 rc = -ENOMEM;
                 goto out;
         }
  
-       OBDO_ALLOC(oa);
+       oa = kmem_cache_alloc(obdo_cachep, GFP_NOFS | __GFP_ZERO);
         if (oa == NULL) {
                 rc = -ENOMEM;
                 goto out;
@@ -1929,6 +1884,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
         i = 0;
         list_for_each_entry(oap, &rpc_list, oap_rpc_item) {
                 struct cl_page *page = oap2cl_page(oap);
+
                 if (clerq == NULL) {
                         clerq = cl_req_alloc(env, page, crt,
                                              1 /* only 1-object rpcs for now */);
@@ -1966,7 +1922,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
  
         sort_brw_pages(pga, page_count);
         rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count,
-                       pga, &req, crattr->cra_capa, 1, 0);
+                       pga, &req, 1, 0);
         if (rc != 0) {
                 CERROR("prep_req failed: %d\n", rc);
                 goto out;
@@ -2034,37 +1990,21 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
                   page_count, aa, cli->cl_r_in_flight,
                   cli->cl_w_in_flight);
  
-       /* XXX: Maybe the caller can check the RPC bulk descriptor to
-        * see which CPU/NUMA node the majority of pages were allocated
-        * on, and try to assign the async RPC to the CPU core
-        * (PDL_POLICY_PREFERRED) to reduce cross-CPU memory traffic.
-        *
-        * But on the other hand, we expect that multiple ptlrpcd
-        * threads and the initial write sponsor can run in parallel,
-        * especially when data checksum is enabled, which is CPU-bound
-        * operation and single ptlrpcd thread cannot process in time.
-        * So more ptlrpcd threads sharing BRW load
-        * (with PDL_POLICY_ROUND) seems better.
-        */
-       ptlrpcd_add_req(req, pol, -1);
+       ptlrpcd_add_req(req);
         rc = 0;
  
  out:
         if (mem_tight != 0)
                 cfs_memory_pressure_restore(mpflag);
  
-       if (crattr != NULL) {
-               capa_put(crattr->cra_capa);
-               OBD_FREE(crattr, sizeof(*crattr));
-       }
+       kfree(crattr);
  
         if (rc != 0) {
                 LASSERT(req == NULL);
  
                 if (oa)
-                       OBDO_FREE(oa);
-               if (pga)
-                       OBD_FREE(pga, sizeof(*pga) * page_count);
+                       kmem_cache_free(obdo_cachep, oa);
+               kfree(pga);
                 /* this should happen rarely and is pretty bad, it makes the
                  * pending list not follow the dirty order */
                 while (!list_empty(ext_list)) {
@@ -2150,6 +2090,7 @@ static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb,
                 /* The request was created before ldlm_cli_enqueue call. */
                 if (rc == ELDLM_LOCK_ABORTED) {
                         struct ldlm_reply *rep;
+
                         rep = req_capsule_server_get(&req->rq_pill,
                                                      &RMF_DLM_REP);
  
@@ -2300,7 +2241,9 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
                         ldlm_lock_decref(lockh, mode);
                         LDLM_LOCK_PUT(matched);
                         return -ECANCELED;
-               } else if (osc_set_lock_data_with_check(matched, einfo)) {
+               }
+
+               if (osc_set_lock_data_with_check(matched, einfo)) {
                         *flags |= LDLM_FL_LVB_READY;
                         /* addref the lock only if not async requests and PW
                          * lock is matched whereas we asked for PR. */
@@ -2325,15 +2268,16 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
                                 ldlm_lock_decref(lockh, einfo->ei_mode);
                         LDLM_LOCK_PUT(matched);
                         return ELDLM_OK;
-               } else {
-                       ldlm_lock_decref(lockh, mode);
-                       LDLM_LOCK_PUT(matched);
                 }
+
+               ldlm_lock_decref(lockh, mode);
+               LDLM_LOCK_PUT(matched);
         }
  
   no_match:
         if (intent) {
                 LIST_HEAD(cancels);
+
                 req = ptlrpc_request_alloc(class_exp2cliimp(exp),
                                            &RQF_LDLM_ENQUEUE_LVB);
                 if (req == NULL)
@@ -2358,6 +2302,7 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
         if (rqset) {
                 if (!rc) {
                         struct osc_enqueue_args *aa;
+
                         CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
                         aa = ptlrpc_req_async_args(req);
                         aa->oa_ei = einfo;
@@ -2372,7 +2317,7 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
                         req->rq_interpret_reply =
                                 (ptlrpc_interpterer_t)osc_enqueue_interpret;
                         if (rqset == PTLRPCD_SET)
-                               ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+                               ptlrpcd_add_req(req);
                         else
                                 ptlrpc_set_add_req(rqset, req);
                 } else if (intent) {
@@ -2480,10 +2425,10 @@ static int osc_statfs_async(struct obd_export *exp,
                             struct obd_info *oinfo, __u64 max_age,
                             struct ptlrpc_request_set *rqset)
  {
-       struct obd_device     *obd = class_exp2obd(exp);
+       struct obd_device *obd = class_exp2obd(exp);
         struct ptlrpc_request *req;
         struct osc_async_args *aa;
-       int                 rc;
+       int rc;
  
         /* We could possibly pass max_age in the request (as an absolute
          * timestamp or a "seconds.usec ago") so the target can avoid doing
@@ -2522,10 +2467,10 @@ static int osc_statfs_async(struct obd_export *exp,
  static int osc_statfs(const struct lu_env *env, struct obd_export *exp,
                       struct obd_statfs *osfs, __u64 max_age, __u32 flags)
  {
-       struct obd_device     *obd = class_exp2obd(exp);
-       struct obd_statfs     *msfs;
+       struct obd_device *obd = class_exp2obd(exp);
+       struct obd_statfs *msfs;
         struct ptlrpc_request *req;
-       struct obd_import     *imp = NULL;
+       struct obd_import *imp = NULL;
         int rc;
  
         /*Since the request might also come from lprocfs, so we need
@@ -2617,7 +2562,7 @@ static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump)
          * because lov_user_md_vX and lov_mds_md_vX have the same size */
         if (lum.lmm_stripe_count > 0) {
                 lum_size = lov_mds_md_size(lum.lmm_stripe_count, lum.lmm_magic);
-               OBD_ALLOC(lumk, lum_size);
+               lumk = kzalloc(lum_size, GFP_NOFS);
                 if (!lumk)
                         return -ENOMEM;
  
@@ -2639,12 +2584,11 @@ static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump)
                 rc = -EFAULT;
  
         if (lumk != &lum)
-               OBD_FREE(lumk, lum_size);
+               kfree(lumk);
  
         return rc;
  }
  
-
  static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                          void *karg, void *uarg)
  {
@@ -2664,7 +2608,7 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
  
                 buf = NULL;
                 len = 0;
-               if (obd_ioctl_getdata(&buf, &len, (void *)uarg)) {
+               if (obd_ioctl_getdata(&buf, &len, uarg)) {
                         err = -EINVAL;
                         goto out;
                 }
@@ -2694,7 +2638,7 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
  
                 memcpy(data->ioc_inlbuf2, &obd->obd_uuid, sizeof(uuid));
  
-               err = copy_to_user((void *)uarg, buf, len);
+               err = copy_to_user(uarg, buf, len);
                 if (err)
                         err = -EFAULT;
                 obd_ioctl_freedata(buf, len);
@@ -2719,7 +2663,7 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                                                data->ioc_offset);
                 goto out;
         case OBD_IOC_POLL_QUOTACHECK:
-               err = osc_quota_poll_check(exp, (struct if_quotacheck *)karg);
+               err = osc_quota_poll_check(exp, karg);
                 goto out;
         case OBD_IOC_PING_TARGET:
                 err = ptlrpc_obd_ping(obd);
@@ -2749,9 +2693,9 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
                 return 0;
         } else if (KEY_IS(KEY_LAST_ID)) {
                 struct ptlrpc_request *req;
-               u64             *reply;
-               char              *tmp;
-               int                 rc;
+               u64 *reply;
+               char *tmp;
+               int rc;
  
                 req = ptlrpc_request_alloc(class_exp2cliimp(exp),
                                            &RQF_OST_GET_INFO_LAST_ID);
@@ -2786,16 +2730,15 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
                 ptlrpc_req_finished(req);
                 return rc;
         } else if (KEY_IS(KEY_FIEMAP)) {
-               struct ll_fiemap_info_key *fm_key =
-                               (struct ll_fiemap_info_key *)key;
-               struct ldlm_res_id       res_id;
-               ldlm_policy_data_t       policy;
-               struct lustre_handle     lockh;
-               ldlm_mode_t              mode = 0;
-               struct ptlrpc_request   *req;
-               struct ll_user_fiemap   *reply;
-               char                    *tmp;
-               int                      rc;
+               struct ll_fiemap_info_key *fm_key = key;
+               struct ldlm_res_id res_id;
+               ldlm_policy_data_t policy;
+               struct lustre_handle lockh;
+               ldlm_mode_t mode = 0;
+               struct ptlrpc_request *req;
+               struct ll_user_fiemap *reply;
+               char *tmp;
+               int rc;
  
                 if (!(fm_key->fiemap.fm_flags & FIEMAP_FLAG_SYNC))
                         goto skip_locking;
@@ -2881,10 +2824,10 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
                               void *val, struct ptlrpc_request_set *set)
  {
         struct ptlrpc_request *req;
-       struct obd_device     *obd = exp->exp_obd;
-       struct obd_import     *imp = class_exp2cliimp(exp);
-       char              *tmp;
-       int                 rc;
+       struct obd_device *obd = exp->exp_obd;
+       struct obd_import *imp = class_exp2cliimp(exp);
+       char *tmp;
+       int rc;
  
         OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SHUTDOWN, 10);
  
@@ -2909,7 +2852,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
                 struct client_obd *cli = &obd->u.cli;
  
                 LASSERT(cli->cl_cache == NULL); /* only once */
-               cli->cl_cache = (struct cl_client_cache *)val;
+               cli->cl_cache = val;
                 atomic_inc(&cli->cl_cache->ccc_users);
                 cli->cl_lru_left = &cli->cl_cache->ccc_lru_left;
  
@@ -2972,7 +2915,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
  
                 CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
                 aa = ptlrpc_req_async_args(req);
-               OBDO_ALLOC(oa);
+               oa = kmem_cache_alloc(obdo_cachep, GFP_NOFS | __GFP_ZERO);
                 if (!oa) {
                         ptlrpc_req_finished(req);
                         return -ENOMEM;
@@ -2987,8 +2930,9 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
                 LASSERT(set != NULL);
                 ptlrpc_set_add_req(set, req);
                 ptlrpc_check_set(NULL, set);
-       } else
-               ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+       } else {
+               ptlrpcd_add_req(req);
+       }
  
         return 0;
  }
@@ -3071,8 +3015,8 @@ static int osc_import_event(struct obd_device *obd,
         }
         case IMP_EVENT_INVALIDATE: {
                 struct ldlm_namespace *ns = obd->obd_namespace;
-               struct lu_env    *env;
-               int                 refcheck;
+               struct lu_env *env;
+               int refcheck;
  
                 env = cl_env_get(&refcheck);
                 if (!IS_ERR(env)) {
@@ -3080,7 +3024,7 @@ static int osc_import_event(struct obd_device *obd,
                         cli = &obd->u.cli;
                         /* all pages go to failing rpcs due to the invalid
                          * import */
-                       osc_io_unplug(env, cli, NULL, PDL_POLICY_ROUND);
+                       osc_io_unplug(env, cli, NULL);
  
                         ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
                         cl_env_put(env, &refcheck);
@@ -3100,7 +3044,7 @@ static int osc_import_event(struct obd_device *obd,
  
                 /* See bug 7198 */
                 if (ocd->ocd_connect_flags & OBD_CONNECT_REQPORTAL)
-                       imp->imp_client->cli_request_portal =OST_REQUEST_PORTAL;
+                       imp->imp_client->cli_request_portal = OST_REQUEST_PORTAL;
  
                 rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL);
                 break;
@@ -3152,16 +3096,19 @@ static int brw_queue_work(const struct lu_env *env, void *data)
  
         CDEBUG(D_CACHE, "Run writeback work for client obd %p.\n", cli);
  
-       osc_io_unplug(env, cli, NULL, PDL_POLICY_SAME);
+       osc_io_unplug(env, cli, NULL);
         return 0;
  }
  
  int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
  {
         struct lprocfs_static_vars lvars = { NULL };
-       struct client_obd         *cli = &obd->u.cli;
-       void                   *handler;
-       int                     rc;
+       struct client_obd *cli = &obd->u.cli;
+       void *handler;
+       int rc;
+       int adding;
+       int added;
+       int req_count;
  
         rc = ptlrpcd_addref();
         if (rc)
@@ -3184,21 +3131,26 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
  
         cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL;
         lprocfs_osc_init_vars(&lvars);
-       if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) {
+       if (lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars) == 0) {
                 lproc_osc_attach_seqstat(obd);
                 sptlrpc_lprocfs_cliobd_attach(obd);
                 ptlrpc_lprocfs_register_obd(obd);
         }
  
-       /* We need to allocate a few requests more, because
-        * brw_interpret tries to create new requests before freeing
-        * previous ones, Ideally we want to have 2x max_rpcs_in_flight
-        * reserved, but I'm afraid that might be too much wasted RAM
-        * in fact, so 2 is just my guess and still should work. */
-       cli->cl_import->imp_rq_pool =
-               ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2,
-                                   OST_MAXREQSIZE,
-                                   ptlrpc_add_rqs_to_pool);
+       /*
+        * We try to control the total number of requests with a upper limit
+        * osc_reqpool_maxreqcount. There might be some race which will cause
+        * over-limit allocation, but it is fine.
+        */
+       req_count = atomic_read(&osc_pool_req_count);
+       if (req_count < osc_reqpool_maxreqcount) {
+               adding = cli->cl_max_rpcs_in_flight + 2;
+               if (req_count + adding > osc_reqpool_maxreqcount)
+                       adding = osc_reqpool_maxreqcount - req_count;
+
+               added = ptlrpc_add_rqs_to_pool(osc_rq_pool, adding);
+               atomic_add(added, &osc_pool_req_count);
+       }
  
         INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
         ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery);
@@ -3218,6 +3170,7 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
         switch (stage) {
         case OBD_CLEANUP_EARLY: {
                 struct obd_import *imp;
+
                 imp = obd->u.cli.cl_import;
                 CDEBUG(D_HA, "Deactivating import %s\n", obd->obd_name);
                 /* ptlrpc_abort_inflight to stop an mds_lov_synchronize */
@@ -3338,6 +3291,8 @@ extern struct lock_class_key osc_ast_guard_class;
  static int __init osc_init(void)
  {
         struct lprocfs_static_vars lvars = { NULL };
+       unsigned int reqpool_size;
+       unsigned int reqsize;
         int rc;
  
         /* print an address of _any_ initialized kernel symbol from this
@@ -3351,16 +3306,47 @@ static int __init osc_init(void)
  
         lprocfs_osc_init_vars(&lvars);
  
-       rc = class_register_type(&osc_obd_ops, NULL, lvars.module_vars,
+       rc = class_register_type(&osc_obd_ops, NULL,
                                  LUSTRE_OSC_NAME, &osc_device_type);
-       if (rc) {
-               lu_kmem_fini(osc_caches);
-               return rc;
-       }
+       if (rc)
+               goto out_kmem;
  
         spin_lock_init(&osc_ast_guard);
         lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class);
  
+       /* This is obviously too much memory, only prevent overflow here */
+       if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) {
+               rc = -EINVAL;
+               goto out_type;
+       }
+
+       reqpool_size = osc_reqpool_mem_max << 20;
+
+       reqsize = 1;
+       while (reqsize < OST_MAXREQSIZE)
+               reqsize = reqsize << 1;
+
+       /*
+        * We don't enlarge the request count in OSC pool according to
+        * cl_max_rpcs_in_flight. The allocation from the pool will only be
+        * tried after normal allocation failed. So a small OSC pool won't
+        * cause much performance degression in most of cases.
+        */
+       osc_reqpool_maxreqcount = reqpool_size / reqsize;
+
+       atomic_set(&osc_pool_req_count, 0);
+       osc_rq_pool = ptlrpc_init_rq_pool(0, OST_MAXREQSIZE,
+                                         ptlrpc_add_rqs_to_pool);
+
+       if (osc_rq_pool)
+               return 0;
+
+       rc = -ENOMEM;
+
+out_type:
+       class_unregister_type(LUSTRE_OSC_NAME);
+out_kmem:
+       lu_kmem_fini(osc_caches);
         return rc;
  }
  
@@ -3368,6 +3354,7 @@ static void /*__exit*/ osc_exit(void)
  {
         class_unregister_type(LUSTRE_OSC_NAME);
         lu_kmem_fini(osc_caches);
+       ptlrpc_free_rq_pool(osc_rq_pool);
  }
  
  MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");