1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
19 #include "osd/OSDMap.h"
22 #include "messages/MOSDOp.h"
23 #include "messages/MOSDOpReply.h"
24 #include "messages/MOSDMap.h"
26 #include "msg/Messenger.h"
28 #include "include/Context.h"
30 #include "common/Finisher.h"
31 #include "common/config.h"
33 #define dout_subsys ceph_subsys_filer
35 #define dout_prefix *_dout << objecter->messenger->get_myname() << ".filer "
37 class Filer::C_Probe : public Context {
43 ceph::real_time mtime;
44 C_Probe(Filer *f, Probe *p, object_t o) : filer(f), probe(p), oid(o),
46 void finish(int r) override {
54 Probe::unique_lock pl(probe->lock);
59 probe_complete = filer->_probed(probe, oid, size, mtime, pl);
60 assert(!pl.owns_lock());
63 probe->onfinish->complete(probe->err);
69 int Filer::probe(inodeno_t ino,
70 file_layout_t *layout,
73 uint64_t *end, // LB, when !fwd
74 ceph::real_time *pmtime,
79 ldout(cct, 10) << "probe " << (fwd ? "fwd ":"bwd ")
81 << " starting from " << start_from
84 assert(snapid); // (until there is a non-NOSNAP write)
86 Probe *probe = new Probe(ino, *layout, snapid, start_from, end, pmtime,
87 flags, fwd, onfinish);
89 return probe_impl(probe, layout, start_from, end);
92 int Filer::probe(inodeno_t ino,
93 file_layout_t *layout,
96 uint64_t *end, // LB, when !fwd
102 ldout(cct, 10) << "probe " << (fwd ? "fwd ":"bwd ")
104 << " starting from " << start_from
107 assert(snapid); // (until there is a non-NOSNAP write)
109 Probe *probe = new Probe(ino, *layout, snapid, start_from, end, pmtime,
110 flags, fwd, onfinish);
111 return probe_impl(probe, layout, start_from, end);
114 int Filer::probe_impl(Probe* probe, file_layout_t *layout,
115 uint64_t start_from, uint64_t *end) // LB, when !fwd
117 // period (bytes before we jump unto a new set of object(s))
118 uint64_t period = layout->get_period();
120 // start with 1+ periods.
121 probe->probing_len = period;
123 if (start_from % period)
124 probe->probing_len += period - (start_from % period);
126 assert(start_from > *end);
127 if (start_from % period)
128 probe->probing_len -= period - (start_from % period);
129 probe->probing_off -= probe->probing_len;
132 Probe::unique_lock pl(probe->lock);
134 assert(!pl.owns_lock());
142 * probe->lock must be initially locked, this function will release it
144 void Filer::_probe(Probe *probe, Probe::unique_lock& pl)
146 assert(pl.owns_lock() && pl.mutex() == &probe->lock);
148 ldout(cct, 10) << "_probe " << hex << probe->ino << dec
149 << " " << probe->probing_off << "~" << probe->probing_len
152 // map range onto objects
153 probe->known_size.clear();
154 probe->probing.clear();
155 Striper::file_to_extents(cct, probe->ino, &probe->layout, probe->probing_off,
156 probe->probing_len, 0, probe->probing);
158 std::vector<ObjectExtent> stat_extents;
159 for (vector<ObjectExtent>::iterator p = probe->probing.begin();
160 p != probe->probing.end();
162 ldout(cct, 10) << "_probe probing " << p->oid << dendl;
163 probe->ops.insert(p->oid);
164 stat_extents.push_back(*p);
168 for (std::vector<ObjectExtent>::iterator i = stat_extents.begin();
169 i != stat_extents.end(); ++i) {
170 C_Probe *c = new C_Probe(this, probe, i->oid);
171 objecter->stat(i->oid, i->oloc, probe->snapid, &c->size, &c->mtime,
172 probe->flags | CEPH_OSD_FLAG_RWORDERED,
173 new C_OnFinisher(c, finisher));
178 * probe->lock must be initially held, and will be released by this function.
180 * @return true if probe is complete and Probe object may be freed.
182 bool Filer::_probed(Probe *probe, const object_t& oid, uint64_t size,
183 ceph::real_time mtime, Probe::unique_lock& pl)
185 assert(pl.owns_lock() && pl.mutex() == &probe->lock);
187 ldout(cct, 10) << "_probed " << probe->ino << " object " << oid
188 << " has size " << size << " mtime " << mtime << dendl;
190 probe->known_size[oid] = size;
191 if (mtime > probe->max_mtime)
192 probe->max_mtime = mtime;
194 assert(probe->ops.count(oid));
195 probe->ops.erase(oid);
197 if (!probe->ops.empty()) {
199 return false; // waiting for more!
202 if (probe->err) { // we hit an error, propagate back up
211 std::reverse(probe->probing.begin(), probe->probing.end());
214 for (vector<ObjectExtent>::iterator p = probe->probing.begin();
215 p != probe->probing.end();
217 uint64_t shouldbe = p->length + p->offset;
218 ldout(cct, 10) << "_probed " << probe->ino << " object " << hex
219 << p->oid << dec << " should be " << shouldbe
220 << ", actual is " << probe->known_size[p->oid]
223 if (!probe->found_size) {
224 assert(probe->known_size[p->oid] <= shouldbe);
226 if ((probe->fwd && probe->known_size[p->oid] == shouldbe) ||
227 (!probe->fwd && probe->known_size[p->oid] == 0 &&
228 probe->probing_off > 0))
229 continue; // keep going
231 // aha, we found the end!
232 // calc offset into buffer_extent to get distance from probe->from.
233 uint64_t oleft = probe->known_size[p->oid] - p->offset;
234 for (vector<pair<uint64_t, uint64_t> >::iterator i
235 = p->buffer_extents.begin();
236 i != p->buffer_extents.end();
238 if (oleft <= (uint64_t)i->second) {
239 end = probe->probing_off + i->first + oleft;
240 ldout(cct, 10) << "_probed end is in buffer_extent " << i->first
241 << "~" << i->second << " off " << oleft
242 << ", from was " << probe->probing_off << ", end is "
245 probe->found_size = true;
246 ldout(cct, 10) << "_probed found size at " << end << dendl;
249 if (!probe->pmtime &&
250 !probe->pumtime) // stop if we don't need mtime too
259 if (!probe->found_size || (probe->probing_off && (probe->pmtime ||
262 ldout(cct, 10) << "_probed probing further" << dendl;
264 uint64_t period = probe->layout.get_period();
266 probe->probing_off += probe->probing_len;
267 assert(probe->probing_off % period == 0);
268 probe->probing_len = period;
271 assert(probe->probing_off % period == 0);
272 probe->probing_len = period;
273 probe->probing_off -= period;
276 assert(!pl.owns_lock());
278 } else if (probe->pmtime) {
279 ldout(cct, 10) << "_probed found mtime " << probe->max_mtime << dendl;
280 *probe->pmtime = probe->max_mtime;
281 } else if (probe->pumtime) {
282 ldout(cct, 10) << "_probed found mtime " << probe->max_mtime << dendl;
283 *probe->pumtime = ceph::real_clock::to_ceph_timespec(probe->max_mtime);
291 // -----------------------
295 typedef std::lock_guard<std::mutex> lock_guard;
296 typedef std::unique_lock<std::mutex> unique_lock;
298 file_layout_t layout;
301 ceph::real_time mtime;
305 PurgeRange(inodeno_t i, const file_layout_t& l, const SnapContext& sc,
306 uint64_t fo, uint64_t no, ceph::real_time t, int fl,
308 : ino(i), layout(l), snapc(sc), first(fo), num(no), mtime(t), flags(fl),
309 oncommit(fin), uncommitted(0) {}
312 int Filer::purge_range(inodeno_t ino,
313 const file_layout_t *layout,
314 const SnapContext& snapc,
315 uint64_t first_obj, uint64_t num_obj,
316 ceph::real_time mtime,
322 // single object? easy!
324 object_t oid = file_object_t(ino, first_obj);
325 object_locator_t oloc = OSDMap::file_to_object_locator(*layout);
326 objecter->remove(oid, oloc, snapc, mtime, flags, oncommit);
330 PurgeRange *pr = new PurgeRange(ino, *layout, snapc, first_obj,
331 num_obj, mtime, flags, oncommit);
333 _do_purge_range(pr, 0);
337 struct C_PurgeRange : public Context {
340 C_PurgeRange(Filer *f, PurgeRange *p) : filer(f), pr(p) {}
341 void finish(int r) override {
342 filer->_do_purge_range(pr, 1);
346 void Filer::_do_purge_range(PurgeRange *pr, int fin)
348 PurgeRange::unique_lock prl(pr->lock);
349 pr->uncommitted -= fin;
350 ldout(cct, 10) << "_do_purge_range " << pr->ino << " objects " << pr->first
351 << "~" << pr->num << " uncommitted " << pr->uncommitted
354 if (pr->num == 0 && pr->uncommitted == 0) {
355 pr->oncommit->complete(0);
361 std::vector<object_t> remove_oids;
363 int max = cct->_conf->filer_max_purge_ops - pr->uncommitted;
364 while (pr->num > 0 && max > 0) {
365 remove_oids.push_back(file_object_t(pr->ino, pr->first));
373 // Issue objecter ops outside pr->lock to avoid lock dependency loop
374 for (const auto& oid : remove_oids) {
375 object_locator_t oloc = OSDMap::file_to_object_locator(pr->layout);
376 objecter->remove(oid, oloc, pr->snapc, pr->mtime, pr->flags,
377 new C_OnFinisher(new C_PurgeRange(this, pr), finisher));
381 // -----------------------
384 typedef std::lock_guard<std::mutex> lock_guard;
385 typedef std::unique_lock<std::mutex> unique_lock;
387 file_layout_t layout;
389 ceph::real_time mtime;
395 uint32_t truncate_seq;
396 TruncRange(inodeno_t i, const file_layout_t& l, const SnapContext& sc,
397 ceph::real_time t, int fl, Context *fin,
398 uint64_t off, uint64_t len, uint32_t ts)
399 : ino(i), layout(l), snapc(sc), mtime(t), flags(fl), oncommit(fin),
400 uncommitted(0), offset(off), length(len), truncate_seq(ts) {}
403 void Filer::truncate(inodeno_t ino,
404 file_layout_t *layout,
405 const SnapContext& snapc,
409 ceph::real_time mtime,
413 uint64_t period = layout->get_period();
414 uint64_t num_objs = Striper::get_num_objects(*layout, len + (offset % period));
416 vector<ObjectExtent> extents;
417 Striper::file_to_extents(cct, ino, layout, offset, len, 0, extents);
418 vector<OSDOp> ops(1);
419 ops[0].op.op = CEPH_OSD_OP_TRIMTRUNC;
420 ops[0].op.extent.truncate_seq = truncate_seq;
421 ops[0].op.extent.truncate_size = extents[0].offset;
422 objecter->_modify(extents[0].oid, extents[0].oloc, ops, mtime, snapc,
427 if (len > 0 && (offset + len) % period)
428 len += period - ((offset + len) % period);
430 TruncRange *tr = new TruncRange(ino, *layout, snapc, mtime, flags, oncommit,
431 offset, len, truncate_seq);
432 _do_truncate_range(tr, 0);
435 struct C_TruncRange : public Context {
438 C_TruncRange(Filer *f, TruncRange *t) : filer(f), tr(t) {}
439 void finish(int r) override {
440 filer->_do_truncate_range(tr, 1);
444 void Filer::_do_truncate_range(TruncRange *tr, int fin)
446 TruncRange::unique_lock trl(tr->lock);
447 tr->uncommitted -= fin;
448 ldout(cct, 10) << "_do_truncate_range " << tr->ino << " objects " << tr->offset
449 << "~" << tr->length << " uncommitted " << tr->uncommitted
452 if (tr->length == 0 && tr->uncommitted == 0) {
453 tr->oncommit->complete(0);
459 vector<ObjectExtent> extents;
461 int max = cct->_conf->filer_max_truncate_ops - tr->uncommitted;
462 if (max > 0 && tr->length > 0) {
463 uint64_t len = tr->layout.get_period() * max;
464 if (len > tr->length)
467 uint64_t offset = tr->offset + tr->length - len;
468 Striper::file_to_extents(cct, tr->ino, &tr->layout, offset, len, 0, extents);
469 tr->uncommitted += extents.size();
475 // Issue objecter ops outside tr->lock to avoid lock dependency loop
476 for (const auto& p : extents) {
477 vector<OSDOp> ops(1);
478 ops[0].op.op = CEPH_OSD_OP_TRIMTRUNC;
479 ops[0].op.extent.truncate_size = p.offset;
480 ops[0].op.extent.truncate_seq = tr->truncate_seq;
481 objecter->_modify(p.oid, p.oloc, ops, tr->mtime, tr->snapc, tr->flags,
482 new C_OnFinisher(new C_TruncRange(this, tr), finisher));