1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph distributed storage system
6 * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
8 * Author: Loic Dachary <loic@dachary.org>
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
17 #ifndef CEPH_ERASURE_CODE_EXAMPLE_H
18 #define CEPH_ERASURE_CODE_EXAMPLE_H
25 #include "crush/CrushWrapper.h"
26 #include "osd/osd_types.h"
27 #include "erasure-code/ErasureCode.h"
29 #define FIRST_DATA_CHUNK 0
30 #define SECOND_DATA_CHUNK 1
31 #define DATA_CHUNKS 2u
33 #define CODING_CHUNK 2
34 #define CODING_CHUNKS 1u
36 #define MINIMUM_TO_RECOVER 2u
38 class ErasureCodeExample : public ErasureCode {
40 ~ErasureCodeExample() override {}
42 int create_rule(const string &name,
44 ostream *ss) const override {
45 return crush.add_simple_rule(name, "default", "host", "",
46 "indep", pg_pool_t::TYPE_ERASURE, ss);
49 int minimum_to_decode(const set<int> &want_to_read,
50 const set<int> &available_chunks,
51 set<int> *minimum) override {
52 if (includes(available_chunks.begin(), available_chunks.end(),
53 want_to_read.begin(), want_to_read.end())) {
54 *minimum = want_to_read;
56 } else if (available_chunks.size() >= MINIMUM_TO_RECOVER) {
57 *minimum = available_chunks;
64 int minimum_to_decode_with_cost(const set<int> &want_to_read,
65 const map<int, int> &available,
66 set<int> *minimum) override {
68 // If one chunk is more expensive to fetch than the others,
69 // recover it instead. For instance, if the cost reflects the
70 // time it takes for a chunk to be retrieved from a remote
71 // OSD and if CPU is cheap, it could make sense to recover
72 // instead of fetching the chunk.
74 map<int, int> c2c(available);
75 if (c2c.size() > DATA_CHUNKS) {
76 if (c2c[FIRST_DATA_CHUNK] > c2c[SECOND_DATA_CHUNK] &&
77 c2c[FIRST_DATA_CHUNK] > c2c[CODING_CHUNK])
78 c2c.erase(FIRST_DATA_CHUNK);
79 else if(c2c[SECOND_DATA_CHUNK] > c2c[FIRST_DATA_CHUNK] &&
80 c2c[SECOND_DATA_CHUNK] > c2c[CODING_CHUNK])
81 c2c.erase(SECOND_DATA_CHUNK);
82 else if(c2c[CODING_CHUNK] > c2c[FIRST_DATA_CHUNK] &&
83 c2c[CODING_CHUNK] > c2c[SECOND_DATA_CHUNK])
84 c2c.erase(CODING_CHUNK);
86 set <int> available_chunks;
87 for (map<int, int>::const_iterator i = c2c.begin();
90 available_chunks.insert(i->first);
91 return minimum_to_decode(want_to_read, available_chunks, minimum);
94 unsigned int get_chunk_count() const override {
95 return DATA_CHUNKS + CODING_CHUNKS;
98 unsigned int get_data_chunk_count() const override {
102 unsigned int get_chunk_size(unsigned int object_size) const override {
103 return ( object_size / DATA_CHUNKS ) + 1;
106 int encode(const set<int> &want_to_encode,
107 const bufferlist &in,
108 map<int, bufferlist> *encoded) override {
110 // make sure all data chunks have the same length, allocating
111 // padding if necessary.
113 unsigned int chunk_length = get_chunk_size(in.length());
115 unsigned int width = get_chunk_count() * get_chunk_size(in.length());
116 bufferptr pad(width - in.length());
117 pad.zero(0, get_data_chunk_count());
120 // compute the coding chunk with first chunk ^ second chunk
122 char *p = out.c_str();
123 for (unsigned i = 0; i < chunk_length; i++)
124 p[i + CODING_CHUNK * chunk_length] =
125 p[i + FIRST_DATA_CHUNK * chunk_length] ^
126 p[i + SECOND_DATA_CHUNK * chunk_length];
128 // populate the bufferlist with bufferptr pointing
129 // to chunk boundaries
131 const bufferptr &ptr = out.front();
132 for (set<int>::iterator j = want_to_encode.begin();
133 j != want_to_encode.end();
135 bufferptr chunk(ptr, (*j) * chunk_length, chunk_length);
136 (*encoded)[*j].push_front(chunk);
141 int encode_chunks(const set<int> &want_to_encode,
142 map<int, bufferlist> *encoded) override {
147 int decode(const set<int> &want_to_read,
148 const map<int, bufferlist> &chunks,
149 map<int, bufferlist> *decoded) override {
151 // All chunks have the same size
153 unsigned chunk_length = (*chunks.begin()).second.length();
154 for (set<int>::iterator i = want_to_read.begin();
155 i != want_to_read.end();
157 if (chunks.find(*i) != chunks.end()) {
159 // If the chunk is available, just copy the bufferptr pointer
160 // to the decoded argument.
162 (*decoded)[*i] = chunks.find(*i)->second;
163 } else if(chunks.size() != 2) {
165 // If a chunk is missing and there are not enough chunks
166 // to recover, abort.
171 // No matter what the missing chunk is, XOR of the other
174 map<int, bufferlist>::const_iterator k = chunks.begin();
175 const char *a = k->second.front().c_str();
177 const char *b = k->second.front().c_str();
178 bufferptr chunk(chunk_length);
179 char *c = chunk.c_str();
180 for (unsigned j = 0; j < chunk_length; j++) {
183 (*decoded)[*i].push_front(chunk);
189 int decode_chunks(const set<int> &want_to_read,
190 const map<int, bufferlist> &chunks,
191 map<int, bufferlist> *decoded) override {
196 const vector<int> &get_chunk_mapping() const override {
197 static vector<int> mapping;