4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2011 Intel Corporation
25 * Copyright 2012 Xyratex Technology Limited
30 * Network Request Scheduler (NRS)
32 * Allows to reorder the handling of RPCs at servers.
34 * Author: Liang Zhen <liang@whamcloud.com>
35 * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
42 #define DEBUG_SUBSYSTEM S_RPC
43 #include "../include/obd_support.h"
44 #include "../include/obd_class.h"
45 #include "../include/lustre_net.h"
46 #include "../include/lprocfs_status.h"
47 #include "../../include/linux/libcfs/libcfs.h"
48 #include "ptlrpc_internal.h"
50 /* XXX: This is just for liblustre. Remove the #if defined directive when the
51 * "cfs_" prefix is dropped from cfs_list_head. */
56 struct nrs_core nrs_core;
58 static int nrs_policy_init(struct ptlrpc_nrs_policy *policy)
60 return policy->pol_desc->pd_ops->op_policy_init != NULL ?
61 policy->pol_desc->pd_ops->op_policy_init(policy) : 0;
64 static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy)
66 LASSERT(policy->pol_ref == 0);
67 LASSERT(policy->pol_req_queued == 0);
69 if (policy->pol_desc->pd_ops->op_policy_fini != NULL)
70 policy->pol_desc->pd_ops->op_policy_fini(policy);
73 static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy,
74 enum ptlrpc_nrs_ctl opc, void *arg)
77 * The policy may be stopped, but the lprocfs files and
78 * ptlrpc_nrs_policy instances remain present until unregistration time.
79 * Do not perform the ctl operation if the policy is stopped, as
80 * policy->pol_private will be NULL in such a case.
82 if (policy->pol_state == NRS_POL_STATE_STOPPED)
85 return policy->pol_desc->pd_ops->op_policy_ctl != NULL ?
86 policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) :
90 static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy)
92 struct ptlrpc_nrs *nrs = policy->pol_nrs;
94 if (policy->pol_desc->pd_ops->op_policy_stop != NULL) {
95 spin_unlock(&nrs->nrs_lock);
97 policy->pol_desc->pd_ops->op_policy_stop(policy);
99 spin_lock(&nrs->nrs_lock);
102 LASSERT(list_empty(&policy->pol_list_queued));
103 LASSERT(policy->pol_req_queued == 0 &&
104 policy->pol_req_started == 0);
106 policy->pol_private = NULL;
108 policy->pol_state = NRS_POL_STATE_STOPPED;
110 if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
111 module_put(policy->pol_desc->pd_owner);
114 static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy)
116 struct ptlrpc_nrs *nrs = policy->pol_nrs;
118 if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping)
121 if (policy->pol_state == NRS_POL_STATE_STARTING)
124 /* In progress or already stopped */
125 if (policy->pol_state != NRS_POL_STATE_STARTED)
128 policy->pol_state = NRS_POL_STATE_STOPPING;
130 /* Immediately make it invisible */
131 if (nrs->nrs_policy_primary == policy) {
132 nrs->nrs_policy_primary = NULL;
135 LASSERT(nrs->nrs_policy_fallback == policy);
136 nrs->nrs_policy_fallback = NULL;
139 /* I have the only refcount */
140 if (policy->pol_ref == 1)
141 nrs_policy_stop0(policy);
147 * Transitions the \a nrs NRS head's primary policy to
148 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no
149 * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED.
151 * \param[in] nrs the NRS head to carry out this operation on
153 static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs)
155 struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary;
160 nrs->nrs_policy_primary = NULL;
162 LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED);
163 tmp->pol_state = NRS_POL_STATE_STOPPING;
165 if (tmp->pol_ref == 0)
166 nrs_policy_stop0(tmp);
170 * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in
171 * response to an lprocfs command to start a policy.
173 * If a primary policy different to the current one is specified, this function
174 * will transition the new policy to the
175 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to
176 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition
177 * the old primary policy (if there is one) to
178 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
179 * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED.
181 * If the fallback policy is specified, this is taken to indicate an instruction
182 * to stop the current primary policy, without substituting it with another
183 * primary policy, so the primary policy (if any) is transitioned to
184 * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
185 * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In
186 * this case, the fallback policy is only left active in the NRS head.
188 static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy)
190 struct ptlrpc_nrs *nrs = policy->pol_nrs;
194 * Don't allow multiple starting which is too complex, and has no real
197 if (nrs->nrs_policy_starting)
200 LASSERT(policy->pol_state != NRS_POL_STATE_STARTING);
202 if (policy->pol_state == NRS_POL_STATE_STOPPING)
205 if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
207 * This is for cases in which the user sets the policy to the
208 * fallback policy (currently fifo for all services); i.e. the
209 * user is resetting the policy to the default; so we stop the
210 * primary policy, if any.
212 if (policy == nrs->nrs_policy_fallback) {
213 nrs_policy_stop_primary(nrs);
218 * If we reach here, we must be setting up the fallback policy
219 * at service startup time, and only a single policy with the
220 * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can
221 * register with NRS core.
223 LASSERT(nrs->nrs_policy_fallback == NULL);
226 * Shouldn't start primary policy if w/o fallback policy.
228 if (nrs->nrs_policy_fallback == NULL)
231 if (policy->pol_state == NRS_POL_STATE_STARTED)
236 * Increase the module usage count for policies registering from other
239 if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 &&
240 !try_module_get(policy->pol_desc->pd_owner)) {
241 atomic_dec(&policy->pol_desc->pd_refs);
242 CERROR("NRS: cannot get module for policy %s; is it alive?\n",
243 policy->pol_desc->pd_name);
248 * Serialize policy starting across the NRS head
250 nrs->nrs_policy_starting = 1;
252 policy->pol_state = NRS_POL_STATE_STARTING;
254 if (policy->pol_desc->pd_ops->op_policy_start) {
255 spin_unlock(&nrs->nrs_lock);
257 rc = policy->pol_desc->pd_ops->op_policy_start(policy);
259 spin_lock(&nrs->nrs_lock);
261 if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
262 module_put(policy->pol_desc->pd_owner);
264 policy->pol_state = NRS_POL_STATE_STOPPED;
269 policy->pol_state = NRS_POL_STATE_STARTED;
271 if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
273 * This path is only used at PTLRPC service setup time.
275 nrs->nrs_policy_fallback = policy;
278 * Try to stop the current primary policy if there is one.
280 nrs_policy_stop_primary(nrs);
283 * And set the newly-started policy as the primary one.
285 nrs->nrs_policy_primary = policy;
289 nrs->nrs_policy_starting = 0;
295 * Increases the policy's usage reference count.
297 static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy)
303 * Decreases the policy's usage reference count, and stops the policy in case it
304 * was already stopping and have no more outstanding usage references (which
305 * indicates it has no more queued or started requests, and can be safely
308 static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy)
310 LASSERT(policy->pol_ref > 0);
313 if (unlikely(policy->pol_ref == 0 &&
314 policy->pol_state == NRS_POL_STATE_STOPPING))
315 nrs_policy_stop0(policy);
318 static void nrs_policy_put(struct ptlrpc_nrs_policy *policy)
320 spin_lock(&policy->pol_nrs->nrs_lock);
321 nrs_policy_put_locked(policy);
322 spin_unlock(&policy->pol_nrs->nrs_lock);
326 * Find and return a policy by name.
328 static struct ptlrpc_nrs_policy *nrs_policy_find_locked(struct ptlrpc_nrs *nrs,
331 struct ptlrpc_nrs_policy *tmp;
333 list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) {
334 if (strncmp(tmp->pol_desc->pd_name, name,
335 NRS_POL_NAME_MAX) == 0) {
336 nrs_policy_get_locked(tmp);
344 * Release references for the resource hierarchy moving upwards towards the
345 * policy instance resource.
347 static void nrs_resource_put(struct ptlrpc_nrs_resource *res)
349 struct ptlrpc_nrs_policy *policy = res->res_policy;
351 if (policy->pol_desc->pd_ops->op_res_put != NULL) {
352 struct ptlrpc_nrs_resource *parent;
354 for (; res != NULL; res = parent) {
355 parent = res->res_parent;
356 policy->pol_desc->pd_ops->op_res_put(policy, res);
362 * Obtains references for each resource in the resource hierarchy for request
363 * \a nrq if it is to be handled by \a policy.
365 * \param[in] policy the policy
366 * \param[in] nrq the request
367 * \param[in] moving_req denotes whether this is a call to the function by
368 * ldlm_lock_reorder_req(), in order to move \a nrq to
369 * the high-priority NRS head; we should not sleep when
372 * \retval NULL resource hierarchy references not obtained
373 * \retval valid-pointer the bottom level of the resource hierarchy
375 * \see ptlrpc_nrs_pol_ops::op_res_get()
378 struct ptlrpc_nrs_resource *nrs_resource_get(struct ptlrpc_nrs_policy *policy,
379 struct ptlrpc_nrs_request *nrq,
383 * Set to NULL to traverse the resource hierarchy from the top.
385 struct ptlrpc_nrs_resource *res = NULL;
386 struct ptlrpc_nrs_resource *tmp = NULL;
390 rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res,
394 nrs_resource_put(res);
398 LASSERT(tmp != NULL);
399 tmp->res_parent = res;
400 tmp->res_policy = policy;
404 * Return once we have obtained a reference to the bottom level
405 * of the resource hierarchy.
413 * Obtains resources for the resource hierarchies and policy references for
414 * the fallback and current primary policy (if any), that will later be used
415 * to handle request \a nrq.
417 * \param[in] nrs the NRS head instance that will be handling request \a nrq.
418 * \param[in] nrq the request that is being handled.
419 * \param[out] resp the array where references to the resource hierarchy are
421 * \param[in] moving_req is set when obtaining resources while moving a
422 * request from a policy on the regular NRS head to a
423 * policy on the HP NRS head (via
424 * ldlm_lock_reorder_req()). It signifies that
425 * allocations to get resources should be atomic; for
426 * a full explanation, see comment in
427 * ptlrpc_nrs_pol_ops::op_res_get().
429 static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs,
430 struct ptlrpc_nrs_request *nrq,
431 struct ptlrpc_nrs_resource **resp,
434 struct ptlrpc_nrs_policy *primary = NULL;
435 struct ptlrpc_nrs_policy *fallback = NULL;
437 memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX);
440 * Obtain policy references.
442 spin_lock(&nrs->nrs_lock);
444 fallback = nrs->nrs_policy_fallback;
445 nrs_policy_get_locked(fallback);
447 primary = nrs->nrs_policy_primary;
449 nrs_policy_get_locked(primary);
451 spin_unlock(&nrs->nrs_lock);
454 * Obtain resource hierarchy references.
456 resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req);
457 LASSERT(resp[NRS_RES_FALLBACK] != NULL);
459 if (primary != NULL) {
460 resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq,
463 * A primary policy may exist which may not wish to serve a
464 * particular request for different reasons; release the
465 * reference on the policy as it will not be used for this
468 if (resp[NRS_RES_PRIMARY] == NULL)
469 nrs_policy_put(primary);
474 * Releases references to resource hierarchies and policies, because they are no
475 * longer required; used when request handling has been completed, or the
476 * request is moving to the high priority NRS head.
478 * \param resp the resource hierarchy that is being released
480 * \see ptlrpc_nrs_req_finalize()
482 static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp)
484 struct ptlrpc_nrs_policy *pols[NRS_RES_MAX];
485 struct ptlrpc_nrs *nrs = NULL;
488 for (i = 0; i < NRS_RES_MAX; i++) {
489 if (resp[i] != NULL) {
490 pols[i] = resp[i]->res_policy;
491 nrs_resource_put(resp[i]);
498 for (i = 0; i < NRS_RES_MAX; i++) {
503 nrs = pols[i]->pol_nrs;
504 spin_lock(&nrs->nrs_lock);
506 nrs_policy_put_locked(pols[i]);
510 spin_unlock(&nrs->nrs_lock);
514 * Obtains an NRS request from \a policy for handling or examination; the
515 * request should be removed in the 'handling' case.
517 * Calling into this function implies we already know the policy has a request
518 * waiting to be handled.
520 * \param[in] policy the policy from which a request
521 * \param[in] peek when set, signifies that we just want to examine the
522 * request, and not handle it, so the request is not removed
524 * \param[in] force when set, it will force a policy to return a request if it
527 * \retval the NRS request to be handled
530 struct ptlrpc_nrs_request *nrs_request_get(struct ptlrpc_nrs_policy *policy,
531 bool peek, bool force)
533 struct ptlrpc_nrs_request *nrq;
535 LASSERT(policy->pol_req_queued > 0);
537 nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force);
539 LASSERT(ergo(nrq != NULL, nrs_request_policy(nrq) == policy));
545 * Enqueues request \a nrq for later handling, via one one the policies for
546 * which resources where earlier obtained via nrs_resource_get_safe(). The
547 * function attempts to enqueue the request first on the primary policy
548 * (if any), since this is the preferred choice.
550 * \param nrq the request being enqueued
552 * \see nrs_resource_get_safe()
554 static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq)
556 struct ptlrpc_nrs_policy *policy;
561 * Try in descending order, because the primary policy (if any) is
562 * the preferred choice.
564 for (i = NRS_RES_MAX - 1; i >= 0; i--) {
565 if (nrq->nr_res_ptrs[i] == NULL)
569 policy = nrq->nr_res_ptrs[i]->res_policy;
571 rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq);
573 policy->pol_nrs->nrs_req_queued++;
574 policy->pol_req_queued++;
579 * Should never get here, as at least the primary policy's
580 * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always
587 * Called when a request has been handled
589 * \param[in] nrs the request that has been handled; can be used for
590 * job/resource control.
592 * \see ptlrpc_nrs_req_stop_nolock()
594 static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq)
596 struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq);
598 if (policy->pol_desc->pd_ops->op_req_stop)
599 policy->pol_desc->pd_ops->op_req_stop(policy, nrq);
601 LASSERT(policy->pol_nrs->nrs_req_started > 0);
602 LASSERT(policy->pol_req_started > 0);
604 policy->pol_nrs->nrs_req_started--;
605 policy->pol_req_started--;
609 * Handler for operations that can be carried out on policies.
611 * Handles opcodes that are common to all policy types within NRS core, and
612 * passes any unknown opcodes to the policy-specific control function.
614 * \param[in] nrs the NRS head this policy belongs to.
615 * \param[in] name the human-readable policy name; should be the same as
616 * ptlrpc_nrs_pol_desc::pd_name.
617 * \param[in] opc the opcode of the operation being carried out.
618 * \param[in,out] arg can be used to pass information in and out between when
619 * carrying an operation; usually data that is private to
620 * the policy at some level, or generic policy status
623 * \retval -ve error condition
624 * \retval 0 operation was carried out successfully
626 static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name,
627 enum ptlrpc_nrs_ctl opc, void *arg)
629 struct ptlrpc_nrs_policy *policy;
632 spin_lock(&nrs->nrs_lock);
634 policy = nrs_policy_find_locked(nrs, name);
635 if (policy == NULL) {
642 * Unknown opcode, pass it down to the policy-specific control
643 * function for handling.
646 rc = nrs_policy_ctl_locked(policy, opc, arg);
652 case PTLRPC_NRS_CTL_START:
653 rc = nrs_policy_start_locked(policy);
658 nrs_policy_put_locked(policy);
660 spin_unlock(&nrs->nrs_lock);
666 * Unregisters a policy by name.
668 * \param[in] nrs the NRS head this policy belongs to.
669 * \param[in] name the human-readable policy name; should be the same as
670 * ptlrpc_nrs_pol_desc::pd_name
675 static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name)
677 struct ptlrpc_nrs_policy *policy = NULL;
679 spin_lock(&nrs->nrs_lock);
681 policy = nrs_policy_find_locked(nrs, name);
682 if (policy == NULL) {
683 spin_unlock(&nrs->nrs_lock);
685 CERROR("Can't find NRS policy %s\n", name);
689 if (policy->pol_ref > 1) {
690 CERROR("Policy %s is busy with %d references\n", name,
691 (int)policy->pol_ref);
692 nrs_policy_put_locked(policy);
694 spin_unlock(&nrs->nrs_lock);
698 LASSERT(policy->pol_req_queued == 0);
699 LASSERT(policy->pol_req_started == 0);
701 if (policy->pol_state != NRS_POL_STATE_STOPPED) {
702 nrs_policy_stop_locked(policy);
703 LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED);
706 list_del(&policy->pol_list);
709 nrs_policy_put_locked(policy);
711 spin_unlock(&nrs->nrs_lock);
713 nrs_policy_fini(policy);
715 LASSERT(policy->pol_private == NULL);
722 * Register a policy from \policy descriptor \a desc with NRS head \a nrs.
724 * \param[in] nrs the NRS head on which the policy will be registered.
725 * \param[in] desc the policy descriptor from which the information will be
726 * obtained to register the policy.
731 static int nrs_policy_register(struct ptlrpc_nrs *nrs,
732 struct ptlrpc_nrs_pol_desc *desc)
734 struct ptlrpc_nrs_policy *policy;
735 struct ptlrpc_nrs_policy *tmp;
736 struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
739 LASSERT(svcpt != NULL);
740 LASSERT(desc->pd_ops != NULL);
741 LASSERT(desc->pd_ops->op_res_get != NULL);
742 LASSERT(desc->pd_ops->op_req_get != NULL);
743 LASSERT(desc->pd_ops->op_req_enqueue != NULL);
744 LASSERT(desc->pd_ops->op_req_dequeue != NULL);
745 LASSERT(desc->pd_compat != NULL);
747 policy = kzalloc_node(sizeof(*policy), GFP_NOFS,
748 cfs_cpt_spread_node(svcpt->scp_service->srv_cptable,
753 policy->pol_nrs = nrs;
754 policy->pol_desc = desc;
755 policy->pol_state = NRS_POL_STATE_STOPPED;
756 policy->pol_flags = desc->pd_flags;
758 INIT_LIST_HEAD(&policy->pol_list);
759 INIT_LIST_HEAD(&policy->pol_list_queued);
761 rc = nrs_policy_init(policy);
767 spin_lock(&nrs->nrs_lock);
769 tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name);
771 CERROR("NRS policy %s has been registered, can't register it for %s\n",
772 policy->pol_desc->pd_name,
773 svcpt->scp_service->srv_name);
774 nrs_policy_put_locked(tmp);
776 spin_unlock(&nrs->nrs_lock);
777 nrs_policy_fini(policy);
783 list_add_tail(&policy->pol_list, &nrs->nrs_policy_list);
786 if (policy->pol_flags & PTLRPC_NRS_FL_REG_START)
787 rc = nrs_policy_start_locked(policy);
789 spin_unlock(&nrs->nrs_lock);
792 (void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
798 * Enqueue request \a req using one of the policies its resources are referring
801 * \param[in] req the request to enqueue.
803 static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req)
805 struct ptlrpc_nrs_policy *policy;
807 LASSERT(req->rq_nrq.nr_initialized);
808 LASSERT(!req->rq_nrq.nr_enqueued);
810 nrs_request_enqueue(&req->rq_nrq);
811 req->rq_nrq.nr_enqueued = 1;
813 policy = nrs_request_policy(&req->rq_nrq);
815 * Add the policy to the NRS head's list of policies with enqueued
816 * requests, if it has not been added there.
818 if (unlikely(list_empty(&policy->pol_list_queued)))
819 list_add_tail(&policy->pol_list_queued,
820 &policy->pol_nrs->nrs_policy_queued);
824 * Enqueue a request on the high priority NRS head.
826 * \param req the request to enqueue.
828 static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req)
830 int opc = lustre_msg_get_opc(req->rq_reqmsg);
832 spin_lock(&req->rq_lock);
834 ptlrpc_nrs_req_add_nolock(req);
836 DEBUG_REQ(D_NET, req, "high priority req");
837 spin_unlock(&req->rq_lock);
841 * Returns a boolean predicate indicating whether the policy described by
842 * \a desc is adequate for use with service \a svc.
844 * \param[in] svc the service
845 * \param[in] desc the policy descriptor
847 * \retval false the policy is not compatible with the service
848 * \retval true the policy is compatible with the service
850 static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc,
851 const struct ptlrpc_nrs_pol_desc *desc)
853 return desc->pd_compat(svc, desc);
857 * Registers all compatible policies in nrs_core.nrs_policies, for NRS head
860 * \param[in] nrs the NRS head
865 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
867 * \see ptlrpc_service_nrs_setup()
869 static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs)
871 struct ptlrpc_nrs_pol_desc *desc;
872 /* for convenience */
873 struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
874 struct ptlrpc_service *svc = svcpt->scp_service;
877 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
879 list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
880 if (nrs_policy_compatible(svc, desc)) {
881 rc = nrs_policy_register(nrs, desc);
883 CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
884 desc->pd_name, svcpt->scp_cpt,
887 * Fail registration if any of the policies'
888 * registration fails.
899 * Initializes NRS head \a nrs of service partition \a svcpt, and registers all
900 * compatible policies in NRS core, with the NRS head.
902 * \param[in] nrs the NRS head
903 * \param[in] svcpt the PTLRPC service partition to setup
908 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
910 static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs,
911 struct ptlrpc_service_part *svcpt)
913 enum ptlrpc_nrs_queue_type queue;
915 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
917 if (nrs == &svcpt->scp_nrs_reg)
918 queue = PTLRPC_NRS_QUEUE_REG;
919 else if (nrs == svcpt->scp_nrs_hp)
920 queue = PTLRPC_NRS_QUEUE_HP;
924 nrs->nrs_svcpt = svcpt;
925 nrs->nrs_queue_type = queue;
926 spin_lock_init(&nrs->nrs_lock);
927 INIT_LIST_HEAD(&nrs->nrs_policy_list);
928 INIT_LIST_HEAD(&nrs->nrs_policy_queued);
930 return nrs_register_policies_locked(nrs);
934 * Allocates a regular and optionally a high-priority NRS head (if the service
935 * handles high-priority RPCs), and then registers all available compatible
936 * policies on those NRS heads.
938 * \param[in,out] svcpt the PTLRPC service partition to setup
940 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
942 static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt)
944 struct ptlrpc_nrs *nrs;
947 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
950 * Initialize the regular NRS head.
952 nrs = nrs_svcpt2nrs(svcpt, false);
953 rc = nrs_svcpt_setup_locked0(nrs, svcpt);
958 * Optionally allocate a high-priority NRS head.
960 if (svcpt->scp_service->srv_ops.so_hpreq_handler == NULL)
964 kzalloc_node(sizeof(*svcpt->scp_nrs_hp), GFP_NOFS,
965 cfs_cpt_spread_node(svcpt->scp_service->srv_cptable,
967 if (svcpt->scp_nrs_hp == NULL) {
972 nrs = nrs_svcpt2nrs(svcpt, true);
973 rc = nrs_svcpt_setup_locked0(nrs, svcpt);
980 * Unregisters all policies on all available NRS heads in a service partition;
981 * called at PTLRPC service unregistration time.
983 * \param[in] svcpt the PTLRPC service partition
985 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
987 static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
989 struct ptlrpc_nrs *nrs;
990 struct ptlrpc_nrs_policy *policy;
991 struct ptlrpc_nrs_policy *tmp;
995 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
998 nrs = nrs_svcpt2nrs(svcpt, hp);
999 nrs->nrs_stopping = 1;
1001 list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list,
1003 rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
1008 * If the service partition has an HP NRS head, clean that up as well.
1010 if (!hp && nrs_svcpt_has_hp(svcpt)) {
1020 * Returns the descriptor for a policy as identified by by \a name.
1022 * \param[in] name the policy name
1024 * \retval the policy descriptor
1027 static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name)
1029 struct ptlrpc_nrs_pol_desc *tmp;
1031 list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) {
1032 if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0)
1039 * Removes the policy from all supported NRS heads of all partitions of all
1042 * \param[in] desc the policy descriptor to unregister
1045 * \retval 0 successfully unregistered policy on all supported NRS heads
1047 * \pre mutex_is_locked(&nrs_core.nrs_mutex)
1048 * \pre mutex_is_locked(&ptlrpc_all_services_mutex)
1050 static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
1052 struct ptlrpc_nrs *nrs;
1053 struct ptlrpc_service *svc;
1054 struct ptlrpc_service_part *svcpt;
1058 LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
1059 LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
1061 list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1063 if (!nrs_policy_compatible(svc, desc) ||
1064 unlikely(svc->srv_is_stopping))
1067 ptlrpc_service_for_each_part(svcpt, i, svc) {
1071 nrs = nrs_svcpt2nrs(svcpt, hp);
1072 rc = nrs_policy_unregister(nrs, desc->pd_name);
1074 * Ignore -ENOENT as the policy may not have registered
1075 * successfully on all service partitions.
1077 if (rc == -ENOENT) {
1079 } else if (rc != 0) {
1080 CERROR("Failed to unregister NRS policy %s for partition %d of service %s: %d\n",
1081 desc->pd_name, svcpt->scp_cpt,
1082 svcpt->scp_service->srv_name, rc);
1086 if (!hp && nrs_svc_has_hp(svc)) {
1092 if (desc->pd_ops->op_lprocfs_fini != NULL)
1093 desc->pd_ops->op_lprocfs_fini(svc);
1100 * Registers a new policy with NRS core.
1102 * The function will only succeed if policy registration with all compatible
1103 * service partitions (if any) is successful.
1105 * N.B. This function should be called either at ptlrpc module initialization
1106 * time when registering a policy that ships with NRS core, or in a
1107 * module's init() function for policies registering from other modules.
1109 * \param[in] conf configuration information for the new policy to register
1114 static int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf)
1116 struct ptlrpc_service *svc;
1117 struct ptlrpc_nrs_pol_desc *desc;
1120 LASSERT(conf != NULL);
1121 LASSERT(conf->nc_ops != NULL);
1122 LASSERT(conf->nc_compat != NULL);
1123 LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one,
1124 conf->nc_compat_svc_name != NULL));
1125 LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0,
1126 conf->nc_owner != NULL));
1128 conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
1131 * External policies are not allowed to start immediately upon
1132 * registration, as there is a relatively higher chance that their
1133 * registration might fail. In such a case, some policy instances may
1134 * already have requests queued wen unregistration needs to happen as
1135 * part o cleanup; since there is currently no way to drain requests
1136 * from a policy unless the service is unregistering, we just disallow
1139 if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) &&
1140 (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK |
1141 PTLRPC_NRS_FL_REG_START))) {
1142 CERROR("NRS: failing to register policy %s. Please check policy flags; external policies cannot act as fallback policies, or be started immediately upon registration without interaction with lprocfs\n",
1147 mutex_lock(&nrs_core.nrs_mutex);
1149 if (nrs_policy_find_desc_locked(conf->nc_name) != NULL) {
1150 CERROR("NRS: failing to register policy %s which has already been registered with NRS core!\n",
1156 desc = kzalloc(sizeof(*desc), GFP_NOFS);
1162 strncpy(desc->pd_name, conf->nc_name, NRS_POL_NAME_MAX);
1163 desc->pd_ops = conf->nc_ops;
1164 desc->pd_compat = conf->nc_compat;
1165 desc->pd_compat_svc_name = conf->nc_compat_svc_name;
1166 if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0)
1167 desc->pd_owner = conf->nc_owner;
1168 desc->pd_flags = conf->nc_flags;
1169 atomic_set(&desc->pd_refs, 0);
1172 * For policies that are held in the same module as NRS (currently
1173 * ptlrpc), do not register the policy with all compatible services,
1174 * as the services will not have started at this point, since we are
1175 * calling from ptlrpc module initialization code. In such cases each
1176 * service will register all compatible policies later, via
1177 * ptlrpc_service_nrs_setup().
1179 if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0)
1183 * Register the new policy on all compatible services
1185 mutex_lock(&ptlrpc_all_services_mutex);
1187 list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
1188 struct ptlrpc_service_part *svcpt;
1192 if (!nrs_policy_compatible(svc, desc) ||
1193 unlikely(svc->srv_is_stopping))
1196 ptlrpc_service_for_each_part(svcpt, i, svc) {
1197 struct ptlrpc_nrs *nrs;
1200 nrs = nrs_svcpt2nrs(svcpt, hp);
1201 rc = nrs_policy_register(nrs, desc);
1203 CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
1204 desc->pd_name, svcpt->scp_cpt,
1205 svcpt->scp_service->srv_name, rc);
1207 rc2 = nrs_policy_unregister_locked(desc);
1209 * Should not fail at this point
1212 mutex_unlock(&ptlrpc_all_services_mutex);
1217 if (!hp && nrs_svc_has_hp(svc)) {
1224 * No need to take a reference to other modules here, as we
1225 * will be calling from the module's init() function.
1227 if (desc->pd_ops->op_lprocfs_init != NULL) {
1228 rc = desc->pd_ops->op_lprocfs_init(svc);
1230 rc2 = nrs_policy_unregister_locked(desc);
1232 * Should not fail at this point
1235 mutex_unlock(&ptlrpc_all_services_mutex);
1242 mutex_unlock(&ptlrpc_all_services_mutex);
1244 list_add_tail(&desc->pd_list, &nrs_core.nrs_policies);
1246 mutex_unlock(&nrs_core.nrs_mutex);
1252 * Setup NRS heads on all service partitions of service \a svc, and register
1253 * all compatible policies on those NRS heads.
1255 * To be called from within ptl
1256 * \param[in] svc the service to setup
1258 * \retval -ve error, the calling logic should eventually call
1259 * ptlrpc_service_nrs_cleanup() to undo any work performed
1262 * \see ptlrpc_register_service()
1263 * \see ptlrpc_service_nrs_cleanup()
1265 int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc)
1267 struct ptlrpc_service_part *svcpt;
1268 const struct ptlrpc_nrs_pol_desc *desc;
1272 mutex_lock(&nrs_core.nrs_mutex);
1275 * Initialize NRS heads on all service CPTs.
1277 ptlrpc_service_for_each_part(svcpt, i, svc) {
1278 rc = nrs_svcpt_setup_locked(svcpt);
1284 * Set up lprocfs interfaces for all supported policies for the
1287 list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1288 if (!nrs_policy_compatible(svc, desc))
1291 if (desc->pd_ops->op_lprocfs_init != NULL) {
1292 rc = desc->pd_ops->op_lprocfs_init(svc);
1300 mutex_unlock(&nrs_core.nrs_mutex);
1306 * Unregisters all policies on all service partitions of service \a svc.
1308 * \param[in] svc the PTLRPC service to unregister
1310 void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc)
1312 struct ptlrpc_service_part *svcpt;
1313 const struct ptlrpc_nrs_pol_desc *desc;
1316 mutex_lock(&nrs_core.nrs_mutex);
1319 * Clean up NRS heads on all service partitions
1321 ptlrpc_service_for_each_part(svcpt, i, svc)
1322 nrs_svcpt_cleanup_locked(svcpt);
1325 * Clean up lprocfs interfaces for all supported policies for the
1328 list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
1329 if (!nrs_policy_compatible(svc, desc))
1332 if (desc->pd_ops->op_lprocfs_fini != NULL)
1333 desc->pd_ops->op_lprocfs_fini(svc);
1336 mutex_unlock(&nrs_core.nrs_mutex);
1340 * Obtains NRS head resources for request \a req.
1342 * These could be either on the regular or HP NRS head of \a svcpt; resources
1343 * taken on the regular head can later be swapped for HP head resources by
1344 * ldlm_lock_reorder_req().
1346 * \param[in] svcpt the service partition
1347 * \param[in] req the request
1348 * \param[in] hp which NRS head of \a svcpt to use
1350 void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
1351 struct ptlrpc_request *req, bool hp)
1353 struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1355 memset(&req->rq_nrq, 0, sizeof(req->rq_nrq));
1356 nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs,
1360 * It is fine to access \e nr_initialized without locking as there is
1361 * no contention at this early stage.
1363 req->rq_nrq.nr_initialized = 1;
1367 * Releases resources for a request; is called after the request has been
1370 * \param[in] req the request
1372 * \see ptlrpc_server_finish_request()
1374 void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req)
1376 if (req->rq_nrq.nr_initialized) {
1377 nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs);
1378 /* no protection on bit nr_initialized because no
1379 * contention at this late stage */
1380 req->rq_nrq.nr_finalized = 1;
1384 void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req)
1386 if (req->rq_nrq.nr_started)
1387 nrs_request_stop(&req->rq_nrq);
1391 * Enqueues request \a req on either the regular or high-priority NRS head
1392 * of service partition \a svcpt.
1394 * \param[in] svcpt the service partition
1395 * \param[in] req the request to be enqueued
1396 * \param[in] hp whether to enqueue the request on the regular or
1397 * high-priority NRS head.
1399 void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
1400 struct ptlrpc_request *req, bool hp)
1402 spin_lock(&svcpt->scp_req_lock);
1405 ptlrpc_nrs_hpreq_add_nolock(req);
1407 ptlrpc_nrs_req_add_nolock(req);
1409 spin_unlock(&svcpt->scp_req_lock);
1412 static void nrs_request_removed(struct ptlrpc_nrs_policy *policy)
1414 LASSERT(policy->pol_nrs->nrs_req_queued > 0);
1415 LASSERT(policy->pol_req_queued > 0);
1417 policy->pol_nrs->nrs_req_queued--;
1418 policy->pol_req_queued--;
1421 * If the policy has no more requests queued, remove it from
1422 * ptlrpc_nrs::nrs_policy_queued.
1424 if (unlikely(policy->pol_req_queued == 0)) {
1425 list_del_init(&policy->pol_list_queued);
1428 * If there are other policies with queued requests, move the
1429 * current policy to the end so that we can round robin over
1430 * all policies and drain the requests.
1432 } else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) {
1433 LASSERT(policy->pol_req_queued <
1434 policy->pol_nrs->nrs_req_queued);
1436 list_move_tail(&policy->pol_list_queued,
1437 &policy->pol_nrs->nrs_policy_queued);
1442 * Obtains a request for handling from an NRS head of service partition
1445 * \param[in] svcpt the service partition
1446 * \param[in] hp whether to obtain a request from the regular or
1447 * high-priority NRS head.
1448 * \param[in] peek when set, signifies that we just want to examine the
1449 * request, and not handle it, so the request is not removed
1451 * \param[in] force when set, it will force a policy to return a request if it
1454 * \retval the request to be handled
1455 * \retval NULL the head has no requests to serve
1457 struct ptlrpc_request *
1458 ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
1459 bool peek, bool force)
1461 struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1462 struct ptlrpc_nrs_policy *policy;
1463 struct ptlrpc_nrs_request *nrq;
1466 * Always try to drain requests from all NRS polices even if they are
1467 * inactive, because the user can change policy status at runtime.
1469 list_for_each_entry(policy, &nrs->nrs_policy_queued,
1471 nrq = nrs_request_get(policy, peek, force);
1473 if (likely(!peek)) {
1474 nrq->nr_started = 1;
1476 policy->pol_req_started++;
1477 policy->pol_nrs->nrs_req_started++;
1479 nrs_request_removed(policy);
1482 return container_of(nrq, struct ptlrpc_request, rq_nrq);
1490 * Returns whether there are any requests currently enqueued on any of the
1491 * policies of service partition's \a svcpt NRS head specified by \a hp. Should
1492 * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable
1495 * \param[in] svcpt the service partition to enquire.
1496 * \param[in] hp whether the regular or high-priority NRS head is to be
1499 * \retval false the indicated NRS head has no enqueued requests.
1500 * \retval true the indicated NRS head has some enqueued requests.
1502 bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp)
1504 struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
1506 return nrs->nrs_req_queued > 0;
1510 * Carries out a control operation \a opc on the policy identified by the
1511 * human-readable \a name, on either all partitions, or only on the first
1512 * partition of service \a svc.
1514 * \param[in] svc the service the policy belongs to.
1515 * \param[in] queue whether to carry out the command on the policy which
1516 * belongs to the regular, high-priority, or both NRS
1517 * heads of service partitions of \a svc.
1518 * \param[in] name the policy to act upon, by human-readable name
1519 * \param[in] opc the opcode of the operation to carry out
1520 * \param[in] single when set, the operation will only be carried out on the
1521 * NRS heads of the first service partition of \a svc.
1522 * This is useful for some policies which e.g. share
1523 * identical values on the same parameters of different
1524 * service partitions; when reading these parameters via
1525 * lprocfs, these policies may just want to obtain and
1526 * print out the values from the first service partition.
1527 * Storing these values centrally elsewhere then could be
1528 * another solution for this.
1529 * \param[in,out] arg can be used as a generic in/out buffer between control
1530 * operations and the user environment.
1532 *\retval -ve error condition
1533 *\retval 0 operation was carried out successfully
1535 int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
1536 enum ptlrpc_nrs_queue_type queue, char *name,
1537 enum ptlrpc_nrs_ctl opc, bool single, void *arg)
1539 struct ptlrpc_service_part *svcpt;
1543 LASSERT(opc != PTLRPC_NRS_CTL_INVALID);
1545 if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0)
1548 ptlrpc_service_for_each_part(svcpt, i, svc) {
1549 if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) {
1550 rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name,
1552 if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG &&
1557 if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) {
1559 * XXX: We could optionally check for
1560 * nrs_svc_has_hp(svc) here, and return an error if it
1561 * is false. Right now we rely on the policies' lprocfs
1562 * handlers that call the present function to make this
1563 * check; if they fail to do so, they might hit the
1564 * assertion inside nrs_svcpt2nrs() below.
1566 rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name,
1568 if (rc != 0 || single)
1576 /* ptlrpc/nrs_fifo.c */
1577 extern struct ptlrpc_nrs_pol_conf nrs_conf_fifo;
1580 * Adds all policies that ship with the ptlrpc module, to NRS core's list of
1581 * policies \e nrs_core.nrs_policies.
1583 * \retval 0 all policies have been registered successfully
1586 int ptlrpc_nrs_init(void)
1590 mutex_init(&nrs_core.nrs_mutex);
1591 INIT_LIST_HEAD(&nrs_core.nrs_policies);
1593 rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo);
1600 * Since no PTLRPC services have been started at this point, all we need
1601 * to do for cleanup is to free the descriptors.
1609 * Removes all policy descriptors from nrs_core::nrs_policies, and frees the
1610 * policy descriptors.
1612 * Since all PTLRPC services are stopped at this point, there are no more
1613 * instances of any policies, because each service will have stopped its policy
1614 * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the
1617 void ptlrpc_nrs_fini(void)
1619 struct ptlrpc_nrs_pol_desc *desc;
1620 struct ptlrpc_nrs_pol_desc *tmp;
1622 list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies,
1624 list_del_init(&desc->pd_list);