X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=kernel%2Fdrivers%2Fstaging%2Flustre%2Flustre%2Fllite%2Fllite_capa.c;fp=kernel%2Fdrivers%2Fstaging%2Flustre%2Flustre%2Fllite%2Fllite_capa.c;h=aec9a44120c025aec0f5efb73b7c4fe3ca0c95a8;hb=9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00;hp=0000000000000000000000000000000000000000;hpb=98260f3884f4a202f9ca5eabed40b1354c489b29;p=kvmfornfv.git diff --git a/kernel/drivers/staging/lustre/lustre/llite/llite_capa.c b/kernel/drivers/staging/lustre/lustre/llite/llite_capa.c new file mode 100644 index 000000000..aec9a4412 --- /dev/null +++ b/kernel/drivers/staging/lustre/lustre/llite/llite_capa.c @@ -0,0 +1,654 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/llite/llite_capa.c + * + * Author: Lai Siyao + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include +#include +#include +#include + +#include "../include/lustre_lite.h" +#include "llite_internal.h" + +/* for obd_capa.c_list, client capa might stay in three places: + * 1. ll_capa_list. + * 2. ll_idle_capas. + * 3. stand alone: just allocated. + */ + +/* capas for oss writeback and those failed to renew */ +static LIST_HEAD(ll_idle_capas); +static struct ptlrpc_thread ll_capa_thread; +static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT]; + +/* llite capa renewal timer */ +struct timer_list ll_capa_timer; +/* for debug: indicate whether capa on llite is enabled or not */ +static atomic_t ll_capa_debug = ATOMIC_INIT(0); +static unsigned long long ll_capa_renewed; +static unsigned long long ll_capa_renewal_noent; +static unsigned long long ll_capa_renewal_failed; +static unsigned long long ll_capa_renewal_retries; + +static int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa); + +static inline void update_capa_timer(struct obd_capa *ocapa, unsigned long expiry) +{ + if (time_before(expiry, ll_capa_timer.expires) || + !timer_pending(&ll_capa_timer)) { + mod_timer(&ll_capa_timer, expiry); + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "ll_capa_timer update: %lu/%lu by", expiry, jiffies); + } +} + +static inline unsigned long capa_renewal_time(struct obd_capa *ocapa) +{ + return cfs_time_sub(ocapa->c_expiry, + cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2); +} + +static inline int capa_is_to_expire(struct obd_capa *ocapa) +{ + return time_before_eq(capa_renewal_time(ocapa), cfs_time_current()); +} + +static inline int have_expired_capa(void) +{ + struct obd_capa *ocapa = NULL; + int expired = 0; + + /* if ll_capa_list has client capa to expire or ll_idle_capas has + * expired capa, return 1. + */ + spin_lock(&capa_lock); + if (!list_empty(ll_capa_list)) { + ocapa = list_entry(ll_capa_list->next, struct obd_capa, + c_list); + expired = capa_is_to_expire(ocapa); + if (!expired) + update_capa_timer(ocapa, capa_renewal_time(ocapa)); + } else if (!list_empty(&ll_idle_capas)) { + ocapa = list_entry(ll_idle_capas.next, struct obd_capa, + c_list); + expired = capa_is_expired(ocapa); + if (!expired) + update_capa_timer(ocapa, ocapa->c_expiry); + } + spin_unlock(&capa_lock); + + if (expired) + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired"); + return expired; +} + +static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head) +{ + struct obd_capa *tmp; + struct list_head *before = NULL; + + /* TODO: client capa is sorted by expiry, this could be optimized */ + list_for_each_entry_reverse(tmp, head, c_list) { + if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) { + before = &tmp->c_list; + break; + } + } + + LASSERT(&ocapa->c_list != before); + list_add(&ocapa->c_list, before ?: head); +} + +static inline int obd_capa_open_count(struct obd_capa *oc) +{ + struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode); + return atomic_read(&lli->lli_open_count); +} + +static void ll_delete_capa(struct obd_capa *ocapa) +{ + struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode); + + if (capa_for_mds(&ocapa->c_capa)) { + LASSERT(lli->lli_mds_capa == ocapa); + lli->lli_mds_capa = NULL; + } else if (capa_for_oss(&ocapa->c_capa)) { + list_del_init(&ocapa->u.cli.lli_list); + } + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client"); + list_del_init(&ocapa->c_list); + capa_count[CAPA_SITE_CLIENT]--; + /* release the ref when alloc */ + capa_put(ocapa); +} + +/* three places where client capa is deleted: + * 1. capa_thread_main(), main place to delete expired capa. + * 2. ll_clear_inode_capas() in ll_clear_inode(). + * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_setattr_ost(). + */ +static int capa_thread_main(void *unused) +{ + struct obd_capa *ocapa, *tmp, *next; + struct inode *inode = NULL; + struct l_wait_info lwi = { 0 }; + int rc; + + thread_set_flags(&ll_capa_thread, SVC_RUNNING); + wake_up(&ll_capa_thread.t_ctl_waitq); + + while (1) { + l_wait_event(ll_capa_thread.t_ctl_waitq, + !thread_is_running(&ll_capa_thread) || + have_expired_capa(), + &lwi); + + if (!thread_is_running(&ll_capa_thread)) + break; + + next = NULL; + + spin_lock(&capa_lock); + list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) { + __u64 ibits; + + LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC); + + if (!capa_is_to_expire(ocapa)) { + next = ocapa; + break; + } + + list_del_init(&ocapa->c_list); + + /* for MDS capability, only renew those which belong to + * dir, or its inode is opened, or client holds LOOKUP + * lock. + */ + /* ibits may be changed by ll_have_md_lock() so we have + * to set it each time */ + ibits = MDS_INODELOCK_LOOKUP; + if (capa_for_mds(&ocapa->c_capa) && + !S_ISDIR(ocapa->u.cli.inode->i_mode) && + obd_capa_open_count(ocapa) == 0 && + !ll_have_md_lock(ocapa->u.cli.inode, + &ibits, LCK_MINMODE)) { + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "skip renewal for"); + sort_add_capa(ocapa, &ll_idle_capas); + continue; + } + + /* for OSS capability, only renew those whose inode is + * opened. + */ + if (capa_for_oss(&ocapa->c_capa) && + obd_capa_open_count(ocapa) == 0) { + /* oss capa with open count == 0 won't renew, + * move to idle list */ + sort_add_capa(ocapa, &ll_idle_capas); + continue; + } + + /* NB iput() is in ll_update_capa() */ + inode = igrab(ocapa->u.cli.inode); + if (inode == NULL) { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "igrab failed for"); + continue; + } + + capa_get(ocapa); + ll_capa_renewed++; + spin_unlock(&capa_lock); + rc = md_renew_capa(ll_i2mdexp(inode), ocapa, + ll_update_capa); + spin_lock(&capa_lock); + if (rc) { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "renew failed: %d", rc); + ll_capa_renewal_failed++; + } + } + + if (next) + update_capa_timer(next, capa_renewal_time(next)); + + list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas, + c_list) { + if (!capa_is_expired(ocapa)) { + if (!next) + update_capa_timer(ocapa, + ocapa->c_expiry); + break; + } + + if (atomic_read(&ocapa->c_refc) > 1) { + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "expired(c_refc %d), don't release", + atomic_read(&ocapa->c_refc)); + /* don't try to renew any more */ + list_del_init(&ocapa->c_list); + continue; + } + + /* expired capa is released. */ + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired"); + ll_delete_capa(ocapa); + } + + spin_unlock(&capa_lock); + } + + thread_set_flags(&ll_capa_thread, SVC_STOPPED); + wake_up(&ll_capa_thread.t_ctl_waitq); + return 0; +} + +void ll_capa_timer_callback(unsigned long unused) +{ + wake_up(&ll_capa_thread.t_ctl_waitq); +} + +int ll_capa_thread_start(void) +{ + struct task_struct *task; + + init_waitqueue_head(&ll_capa_thread.t_ctl_waitq); + + task = kthread_run(capa_thread_main, NULL, "ll_capa"); + if (IS_ERR(task)) { + CERROR("cannot start expired capa thread: rc %ld\n", + PTR_ERR(task)); + return PTR_ERR(task); + } + wait_event(ll_capa_thread.t_ctl_waitq, + thread_is_running(&ll_capa_thread)); + + return 0; +} + +void ll_capa_thread_stop(void) +{ + thread_set_flags(&ll_capa_thread, SVC_STOPPING); + wake_up(&ll_capa_thread.t_ctl_waitq); + wait_event(ll_capa_thread.t_ctl_waitq, + thread_is_stopped(&ll_capa_thread)); +} + +struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa; + int found = 0; + + if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0) + return NULL; + + LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW || + opc == CAPA_OPC_OSS_TRUNC); + + spin_lock(&capa_lock); + list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) { + if (capa_is_expired(ocapa)) + continue; + if ((opc & CAPA_OPC_OSS_WRITE) && + capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) { + found = 1; + break; + } else if ((opc & CAPA_OPC_OSS_READ) && + capa_opc_supported(&ocapa->c_capa, + CAPA_OPC_OSS_READ)) { + found = 1; + break; + } else if ((opc & CAPA_OPC_OSS_TRUNC) && + capa_opc_supported(&ocapa->c_capa, opc)) { + found = 1; + break; + } + } + + if (found) { + LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa), + ll_inode2fid(inode))); + LASSERT(ocapa->c_site == CAPA_SITE_CLIENT); + + capa_get(ocapa); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client"); + } else { + ocapa = NULL; + + if (atomic_read(&ll_capa_debug)) { + CERROR("no capability for "DFID" opc %#llx\n", + PFID(&lli->lli_fid), opc); + atomic_set(&ll_capa_debug, 0); + } + } + spin_unlock(&capa_lock); + + return ocapa; +} +EXPORT_SYMBOL(ll_osscapa_get); + +struct obd_capa *ll_mdscapa_get(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa; + + LASSERT(inode != NULL); + + if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0) + return NULL; + + spin_lock(&capa_lock); + ocapa = capa_get(lli->lli_mds_capa); + spin_unlock(&capa_lock); + if (!ocapa && atomic_read(&ll_capa_debug)) { + CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid)); + atomic_set(&ll_capa_debug, 0); + } + + return ocapa; +} + +static struct obd_capa *do_add_mds_capa(struct inode *inode, + struct obd_capa *ocapa) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *old = lli->lli_mds_capa; + struct lustre_capa *capa = &ocapa->c_capa; + + if (!old) { + ocapa->u.cli.inode = inode; + lli->lli_mds_capa = ocapa; + capa_count[CAPA_SITE_CLIENT]++; + + DEBUG_CAPA(D_SEC, capa, "add MDS"); + } else { + spin_lock(&old->c_lock); + old->c_capa = *capa; + spin_unlock(&old->c_lock); + + DEBUG_CAPA(D_SEC, capa, "update MDS"); + + capa_put(ocapa); + ocapa = old; + } + return ocapa; +} + +static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa; + + /* inside capa_lock */ + list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) { + if ((capa_opc(&ocapa->c_capa) & opc) != opc) + continue; + + LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa), + ll_inode2fid(inode))); + LASSERT(ocapa->c_site == CAPA_SITE_CLIENT); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client"); + return ocapa; + } + + return NULL; +} + +static inline void inode_add_oss_capa(struct inode *inode, + struct obd_capa *ocapa) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *tmp; + struct list_head *next = NULL; + + /* capa is sorted in lli_oss_capas so lookup can always find the + * latest one */ + list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) { + if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) { + next = &tmp->u.cli.lli_list; + break; + } + } + LASSERT(&ocapa->u.cli.lli_list != next); + list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas); +} + +static struct obd_capa *do_add_oss_capa(struct inode *inode, + struct obd_capa *ocapa) +{ + struct obd_capa *old; + struct lustre_capa *capa = &ocapa->c_capa; + + LASSERTF(S_ISREG(inode->i_mode), + "inode has oss capa, but not regular file, mode: %d\n", + inode->i_mode); + + /* FIXME: can't replace it so easily with fine-grained opc */ + old = do_lookup_oss_capa(inode, capa_opc(capa) & CAPA_OPC_OSS_ONLY); + if (!old) { + ocapa->u.cli.inode = inode; + INIT_LIST_HEAD(&ocapa->u.cli.lli_list); + capa_count[CAPA_SITE_CLIENT]++; + + DEBUG_CAPA(D_SEC, capa, "add OSS"); + } else { + spin_lock(&old->c_lock); + old->c_capa = *capa; + spin_unlock(&old->c_lock); + + DEBUG_CAPA(D_SEC, capa, "update OSS"); + + capa_put(ocapa); + ocapa = old; + } + + inode_add_oss_capa(inode, ocapa); + return ocapa; +} + +struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa) +{ + spin_lock(&capa_lock); + ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) : + do_add_oss_capa(inode, ocapa); + + /* truncate capa won't renew */ + if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) { + set_capa_expiry(ocapa); + list_del_init(&ocapa->c_list); + sort_add_capa(ocapa, ll_capa_list); + + update_capa_timer(ocapa, capa_renewal_time(ocapa)); + } + + spin_unlock(&capa_lock); + + atomic_set(&ll_capa_debug, 1); + return ocapa; +} + +static inline void delay_capa_renew(struct obd_capa *oc, unsigned long delay) +{ + /* NB: set a fake expiry for this capa to prevent it renew too soon */ + oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay)); +} + +static int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa) +{ + struct inode *inode = ocapa->u.cli.inode; + int rc = 0; + + LASSERT(ocapa); + + if (IS_ERR(capa)) { + /* set error code */ + rc = PTR_ERR(capa); + spin_lock(&capa_lock); + if (rc == -ENOENT) { + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "renewal canceled because object removed"); + ll_capa_renewal_noent++; + } else { + ll_capa_renewal_failed++; + + /* failed capa won't be renewed any longer, but if -EIO, + * client might be doing recovery, retry in 2 min. */ + if (rc == -EIO && !capa_is_expired(ocapa)) { + delay_capa_renew(ocapa, 120); + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "renewal failed: -EIO, retry in 2 mins"); + ll_capa_renewal_retries++; + goto retry; + } else { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "renewal failed(rc: %d) for", rc); + } + } + + list_del_init(&ocapa->c_list); + sort_add_capa(ocapa, &ll_idle_capas); + spin_unlock(&capa_lock); + + capa_put(ocapa); + iput(inode); + return rc; + } + + spin_lock(&ocapa->c_lock); + LASSERT(!memcmp(&ocapa->c_capa, capa, + offsetof(struct lustre_capa, lc_opc))); + ocapa->c_capa = *capa; + set_capa_expiry(ocapa); + spin_unlock(&ocapa->c_lock); + + spin_lock(&capa_lock); + if (capa_for_oss(capa)) + inode_add_oss_capa(inode, ocapa); + DEBUG_CAPA(D_SEC, capa, "renew"); +retry: + list_del_init(&ocapa->c_list); + sort_add_capa(ocapa, ll_capa_list); + update_capa_timer(ocapa, capa_renewal_time(ocapa)); + spin_unlock(&capa_lock); + + capa_put(ocapa); + iput(inode); + return rc; +} + +void ll_capa_open(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + + if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA)) + == 0) + return; + + if (!S_ISREG(inode->i_mode)) + return; + + atomic_inc(&lli->lli_open_count); +} + +void ll_capa_close(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + + if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA)) + == 0) + return; + + if (!S_ISREG(inode->i_mode)) + return; + + atomic_dec(&lli->lli_open_count); +} + +/* delete CAPA_OPC_OSS_TRUNC only */ +void ll_truncate_free_capa(struct obd_capa *ocapa) +{ + if (!ocapa) + return; + + LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC); + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate"); + + /* release ref when find */ + capa_put(ocapa); + if (likely(ocapa->c_capa.lc_opc == CAPA_OPC_OSS_TRUNC)) { + spin_lock(&capa_lock); + ll_delete_capa(ocapa); + spin_unlock(&capa_lock); + } +} + +void ll_clear_inode_capas(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa, *tmp; + + spin_lock(&capa_lock); + ocapa = lli->lli_mds_capa; + if (ocapa) + ll_delete_capa(ocapa); + + list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas, + u.cli.lli_list) + ll_delete_capa(ocapa); + spin_unlock(&capa_lock); +} + +void ll_print_capa_stat(struct ll_sb_info *sbi) +{ + if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA)) + LCONSOLE_INFO("Fid capabilities renewed: %llu\n" + "Fid capabilities renewal ENOENT: %llu\n" + "Fid capabilities failed to renew: %llu\n" + "Fid capabilities renewal retries: %llu\n", + ll_capa_renewed, ll_capa_renewal_noent, + ll_capa_renewal_failed, ll_capa_renewal_retries); +}