X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=qemu%2Fblock%2Fthrottle-groups.c;fp=qemu%2Fblock%2Fthrottle-groups.c;h=1abc6fcaea73690aeb5f175dfcc7122471c4061d;hb=e44e3482bdb4d0ebde2d8b41830ac2cdb07948fb;hp=0000000000000000000000000000000000000000;hpb=9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00;p=kvmfornfv.git diff --git a/qemu/block/throttle-groups.c b/qemu/block/throttle-groups.c new file mode 100644 index 000000000..1abc6fcae --- /dev/null +++ b/qemu/block/throttle-groups.c @@ -0,0 +1,501 @@ +/* + * QEMU block throttling group infrastructure + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet + * Alberto Garcia + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "block/throttle-groups.h" +#include "qemu/queue.h" +#include "qemu/thread.h" +#include "sysemu/qtest.h" + +/* The ThrottleGroup structure (with its ThrottleState) is shared + * among different BlockDriverState and it's independent from + * AioContext, so in order to use it from different threads it needs + * its own locking. + * + * This locking is however handled internally in this file, so it's + * mostly transparent to outside users (but see the documentation in + * throttle_groups_lock()). + * + * The whole ThrottleGroup structure is private and invisible to + * outside users, that only use it through its ThrottleState. + * + * In addition to the ThrottleGroup structure, BlockDriverState has + * fields that need to be accessed by other members of the group and + * therefore also need to be protected by this lock. Once a BDS is + * registered in a group those fields can be accessed by other threads + * any time. + * + * Again, all this is handled internally and is mostly transparent to + * the outside. The 'throttle_timers' field however has an additional + * constraint because it may be temporarily invalid (see for example + * bdrv_set_aio_context()). Therefore in this file a thread will + * access some other BDS's timers only after verifying that that BDS + * has throttled requests in the queue. + */ +typedef struct ThrottleGroup { + char *name; /* This is constant during the lifetime of the group */ + + QemuMutex lock; /* This lock protects the following four fields */ + ThrottleState ts; + QLIST_HEAD(, BlockDriverState) head; + BlockDriverState *tokens[2]; + bool any_timer_armed[2]; + + /* These two are protected by the global throttle_groups_lock */ + unsigned refcount; + QTAILQ_ENTRY(ThrottleGroup) list; +} ThrottleGroup; + +static QemuMutex throttle_groups_lock; +static QTAILQ_HEAD(, ThrottleGroup) throttle_groups = + QTAILQ_HEAD_INITIALIZER(throttle_groups); + +/* Increments the reference count of a ThrottleGroup given its name. + * + * If no ThrottleGroup is found with the given name a new one is + * created. + * + * @name: the name of the ThrottleGroup + * @ret: the ThrottleGroup + */ +static ThrottleGroup *throttle_group_incref(const char *name) +{ + ThrottleGroup *tg = NULL; + ThrottleGroup *iter; + + qemu_mutex_lock(&throttle_groups_lock); + + /* Look for an existing group with that name */ + QTAILQ_FOREACH(iter, &throttle_groups, list) { + if (!strcmp(name, iter->name)) { + tg = iter; + break; + } + } + + /* Create a new one if not found */ + if (!tg) { + tg = g_new0(ThrottleGroup, 1); + tg->name = g_strdup(name); + qemu_mutex_init(&tg->lock); + throttle_init(&tg->ts); + QLIST_INIT(&tg->head); + + QTAILQ_INSERT_TAIL(&throttle_groups, tg, list); + } + + tg->refcount++; + + qemu_mutex_unlock(&throttle_groups_lock); + + return tg; +} + +/* Decrease the reference count of a ThrottleGroup. + * + * When the reference count reaches zero the ThrottleGroup is + * destroyed. + * + * @tg: The ThrottleGroup to unref + */ +static void throttle_group_unref(ThrottleGroup *tg) +{ + qemu_mutex_lock(&throttle_groups_lock); + if (--tg->refcount == 0) { + QTAILQ_REMOVE(&throttle_groups, tg, list); + qemu_mutex_destroy(&tg->lock); + g_free(tg->name); + g_free(tg); + } + qemu_mutex_unlock(&throttle_groups_lock); +} + +/* Get the name from a BlockDriverState's ThrottleGroup. The name (and + * the pointer) is guaranteed to remain constant during the lifetime + * of the group. + * + * @bs: a BlockDriverState that is member of a throttling group + * @ret: the name of the group. + */ +const char *throttle_group_get_name(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + return tg->name; +} + +/* Return the next BlockDriverState in the round-robin sequence, + * simulating a circular list. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @ret: the next BlockDriverState in the sequence + */ +static BlockDriverState *throttle_group_next_bs(BlockDriverState *bs) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + BlockDriverState *next = QLIST_NEXT(bs, round_robin); + + if (!next) { + return QLIST_FIRST(&tg->head); + } + + return next; +} + +/* Return the next BlockDriverState in the round-robin sequence with + * pending I/O requests. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + * @ret: the next BlockDriverState with pending requests, or bs + * if there is none. + */ +static BlockDriverState *next_throttle_token(BlockDriverState *bs, + bool is_write) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + BlockDriverState *token, *start; + + start = token = tg->tokens[is_write]; + + /* get next bs round in round robin style */ + token = throttle_group_next_bs(token); + while (token != start && !token->pending_reqs[is_write]) { + token = throttle_group_next_bs(token); + } + + /* If no IO are queued for scheduling on the next round robin token + * then decide the token is the current bs because chances are + * the current bs get the current request queued. + */ + if (token == start && !token->pending_reqs[is_write]) { + token = bs; + } + + return token; +} + +/* Check if the next I/O request for a BlockDriverState needs to be + * throttled or not. If there's no timer set in this group, set one + * and update the token accordingly. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + * @ret: whether the I/O request needs to be throttled or not + */ +static bool throttle_group_schedule_timer(BlockDriverState *bs, + bool is_write) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleTimers *tt = &bs->throttle_timers; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + bool must_wait; + + /* Check if any of the timers in this group is already armed */ + if (tg->any_timer_armed[is_write]) { + return true; + } + + must_wait = throttle_schedule_timer(ts, tt, is_write); + + /* If a timer just got armed, set bs as the current token */ + if (must_wait) { + tg->tokens[is_write] = bs; + tg->any_timer_armed[is_write] = true; + } + + return must_wait; +} + +/* Look for the next pending I/O request and schedule it. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + */ +static void schedule_next_request(BlockDriverState *bs, bool is_write) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + bool must_wait; + BlockDriverState *token; + + /* Check if there's any pending request to schedule next */ + token = next_throttle_token(bs, is_write); + if (!token->pending_reqs[is_write]) { + return; + } + + /* Set a timer for the request if it needs to be throttled */ + must_wait = throttle_group_schedule_timer(token, is_write); + + /* If it doesn't have to wait, queue it for immediate execution */ + if (!must_wait) { + /* Give preference to requests from the current bs */ + if (qemu_in_coroutine() && + qemu_co_queue_next(&bs->throttled_reqs[is_write])) { + token = bs; + } else { + ThrottleTimers *tt = &token->throttle_timers; + int64_t now = qemu_clock_get_ns(tt->clock_type); + timer_mod(tt->timers[is_write], now + 1); + tg->any_timer_armed[is_write] = true; + } + tg->tokens[is_write] = token; + } +} + +/* Check if an I/O request needs to be throttled, wait and set a timer + * if necessary, and schedule the next request using a round robin + * algorithm. + * + * @bs: the current BlockDriverState + * @bytes: the number of bytes for this I/O + * @is_write: the type of operation (read/write) + */ +void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs, + unsigned int bytes, + bool is_write) +{ + bool must_wait; + BlockDriverState *token; + + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + + /* First we check if this I/O has to be throttled. */ + token = next_throttle_token(bs, is_write); + must_wait = throttle_group_schedule_timer(token, is_write); + + /* Wait if there's a timer set or queued requests of this type */ + if (must_wait || bs->pending_reqs[is_write]) { + bs->pending_reqs[is_write]++; + qemu_mutex_unlock(&tg->lock); + qemu_co_queue_wait(&bs->throttled_reqs[is_write]); + qemu_mutex_lock(&tg->lock); + bs->pending_reqs[is_write]--; + } + + /* The I/O will be executed, so do the accounting */ + throttle_account(bs->throttle_state, is_write, bytes); + + /* Schedule the next request */ + schedule_next_request(bs, is_write); + + qemu_mutex_unlock(&tg->lock); +} + +/* Update the throttle configuration for a particular group. Similar + * to throttle_config(), but guarantees atomicity within the + * throttling group. + * + * @bs: a BlockDriverState that is member of the group + * @cfg: the configuration to set + */ +void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg) +{ + ThrottleTimers *tt = &bs->throttle_timers; + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + /* throttle_config() cancels the timers */ + if (timer_pending(tt->timers[0])) { + tg->any_timer_armed[0] = false; + } + if (timer_pending(tt->timers[1])) { + tg->any_timer_armed[1] = false; + } + throttle_config(ts, tt, cfg); + qemu_mutex_unlock(&tg->lock); +} + +/* Get the throttle configuration from a particular group. Similar to + * throttle_get_config(), but guarantees atomicity within the + * throttling group. + * + * @bs: a BlockDriverState that is member of the group + * @cfg: the configuration will be written here + */ +void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + throttle_get_config(ts, cfg); + qemu_mutex_unlock(&tg->lock); +} + +/* ThrottleTimers callback. This wakes up a request that was waiting + * because it had been throttled. + * + * @bs: the BlockDriverState whose request had been throttled + * @is_write: the type of operation (read/write) + */ +static void timer_cb(BlockDriverState *bs, bool is_write) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + bool empty_queue; + + /* The timer has just been fired, so we can update the flag */ + qemu_mutex_lock(&tg->lock); + tg->any_timer_armed[is_write] = false; + qemu_mutex_unlock(&tg->lock); + + /* Run the request that was waiting for this timer */ + empty_queue = !qemu_co_enter_next(&bs->throttled_reqs[is_write]); + + /* If the request queue was empty then we have to take care of + * scheduling the next one */ + if (empty_queue) { + qemu_mutex_lock(&tg->lock); + schedule_next_request(bs, is_write); + qemu_mutex_unlock(&tg->lock); + } +} + +static void read_timer_cb(void *opaque) +{ + timer_cb(opaque, false); +} + +static void write_timer_cb(void *opaque) +{ + timer_cb(opaque, true); +} + +/* Register a BlockDriverState in the throttling group, also + * initializing its timers and updating its throttle_state pointer to + * point to it. If a throttling group with that name does not exist + * yet, it will be created. + * + * @bs: the BlockDriverState to insert + * @groupname: the name of the group + */ +void throttle_group_register_bs(BlockDriverState *bs, const char *groupname) +{ + int i; + ThrottleGroup *tg = throttle_group_incref(groupname); + int clock_type = QEMU_CLOCK_REALTIME; + + if (qtest_enabled()) { + /* For testing block IO throttling only */ + clock_type = QEMU_CLOCK_VIRTUAL; + } + + bs->throttle_state = &tg->ts; + + qemu_mutex_lock(&tg->lock); + /* If the ThrottleGroup is new set this BlockDriverState as the token */ + for (i = 0; i < 2; i++) { + if (!tg->tokens[i]) { + tg->tokens[i] = bs; + } + } + + QLIST_INSERT_HEAD(&tg->head, bs, round_robin); + + throttle_timers_init(&bs->throttle_timers, + bdrv_get_aio_context(bs), + clock_type, + read_timer_cb, + write_timer_cb, + bs); + + qemu_mutex_unlock(&tg->lock); +} + +/* Unregister a BlockDriverState from its group, removing it from the + * list, destroying the timers and setting the throttle_state pointer + * to NULL. + * + * The group will be destroyed if it's empty after this operation. + * + * @bs: the BlockDriverState to remove + */ +void throttle_group_unregister_bs(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + int i; + + qemu_mutex_lock(&tg->lock); + for (i = 0; i < 2; i++) { + if (tg->tokens[i] == bs) { + BlockDriverState *token = throttle_group_next_bs(bs); + /* Take care of the case where this is the last bs in the group */ + if (token == bs) { + token = NULL; + } + tg->tokens[i] = token; + } + } + + /* remove the current bs from the list */ + QLIST_REMOVE(bs, round_robin); + throttle_timers_destroy(&bs->throttle_timers); + qemu_mutex_unlock(&tg->lock); + + throttle_group_unref(tg); + bs->throttle_state = NULL; +} + +/* Acquire the lock of this throttling group. + * + * You won't normally need to use this. None of the functions from the + * ThrottleGroup API require you to acquire the lock since all of them + * deal with it internally. + * + * This should only be used in exceptional cases when you want to + * access the protected fields of a BlockDriverState directly + * (e.g. bdrv_swap()). + * + * @bs: a BlockDriverState that is member of the group + */ +void throttle_group_lock(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); +} + +/* Release the lock of this throttling group. + * + * See the comments in throttle_group_lock(). + */ +void throttle_group_unlock(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_unlock(&tg->lock); +} + +static void throttle_groups_init(void) +{ + qemu_mutex_init(&throttle_groups_lock); +} + +block_init(throttle_groups_init);