1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2013 Inktank <info@inktank.com>
8 * LGPL2.1 (see COPYING-LGPL2.1) or later
12 #include <gtest/gtest.h>
14 #include "include/stringify.h"
15 #include "common/bloom_filter.hpp"
17 TEST(BloomFilter, Basic) {
18 bloom_filter bf(10, .1, 1);
22 ASSERT_TRUE(bf.contains("foo"));
23 ASSERT_TRUE(bf.contains("bar"));
25 ASSERT_EQ(2U, bf.element_count());
28 TEST(BloomFilter, Empty) {
30 for (int i=0; i<100; ++i) {
31 ASSERT_FALSE(bf.contains(i));
32 ASSERT_FALSE(bf.contains(stringify(i)));
36 TEST(BloomFilter, Sweep) {
37 std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
38 std::cout.precision(5);
39 std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl;
40 for (int ex = 3; ex < 12; ex += 2) {
41 for (float fpp = .001; fpp < .5; fpp *= 4.0) {
43 bloom_filter bf(max, fpp, 1);
47 ASSERT_TRUE(bf.contains("foo"));
48 ASSERT_TRUE(bf.contains("bar"));
50 for (int n = 0; n < max; n++)
51 bf.insert("ok" + stringify(n));
55 for (int n = 0; n < test; n++)
56 if (bf.contains("asdf" + stringify(n)))
59 ASSERT_TRUE(bf.contains("foo"));
60 ASSERT_TRUE(bf.contains("bar"));
62 double actual = (double)hit / (double)test;
67 double byte_per_insert = (double)bl.length() / (double)max;
69 std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert << std::endl;
70 ASSERT_TRUE(actual < fpp * 10);
76 TEST(BloomFilter, SweepInt) {
77 std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
78 std::cout.precision(5);
79 std::cout << "# max\tfpp\tactual\tsize\tB/insert\tdensity\tapprox_element_count" << std::endl;
80 for (int ex = 3; ex < 12; ex += 2) {
81 for (float fpp = .001; fpp < .5; fpp *= 4.0) {
83 bloom_filter bf(max, fpp, 1);
90 for (int n = 0; n < max; n++)
95 for (int n = 0; n < test; n++)
96 if (bf.contains(100000 + n))
102 double actual = (double)hit / (double)test;
107 double byte_per_insert = (double)bl.length() / (double)max;
109 std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert
110 << "\t" << bf.density() << "\t" << bf.approx_unique_element_count() << std::endl;
111 ASSERT_TRUE(actual < fpp * 10);
112 ASSERT_TRUE(actual > fpp / 10);
113 ASSERT_TRUE(bf.density() > 0.40);
114 ASSERT_TRUE(bf.density() < 0.60);
120 TEST(BloomFilter, CompressibleSweep) {
121 std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
122 std::cout.precision(5);
123 std::cout << "# max\tins\test ins\tafter\ttgtfpp\tactual\tsize\tb/elem\n";
126 for (int div = 1; div < 10; div++) {
127 compressible_bloom_filter bf(max, fpp, 1);
129 for (int n = 0; n < t; n++)
132 unsigned est = bf.approx_unique_element_count();
134 bf.compress(1.0 / div);
136 for (int n = 0; n < t; n++)
137 ASSERT_TRUE(bf.contains(n));
139 int test = max * 100;
141 for (int n = 0; n < test; n++)
142 if (bf.contains(100000 + n))
145 double actual = (double)hit / (double)test;
150 double byte_per_insert = (double)bl.length() / (double)max;
151 unsigned est_after = bf.approx_unique_element_count();
158 << "\t" << bl.length() << "\t" << byte_per_insert
161 ASSERT_TRUE(actual < fpp * 2.0);
162 ASSERT_TRUE(actual > fpp / 2.0);
163 ASSERT_TRUE(est_after < est * 2);
164 ASSERT_TRUE(est_after > est / 2);
170 TEST(BloomFilter, BinSweep) {
171 std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
172 std::cout.precision(5);
173 int total_max = 16384;
174 float total_fpp = .01;
175 std::cout << "total_inserts " << total_max << " target-fpp " << total_fpp << std::endl;
176 for (int bins = 1; bins < 16; ++bins) {
177 int max = total_max / bins;
178 float fpp = total_fpp / bins;//pow(total_fpp, bins);
180 std::vector<bloom_filter*> ls;
182 for (int i=0; i<bins; i++) {
183 ls.push_back(new bloom_filter(max, fpp, i));
184 for (int j=0; j<max; j++) {
185 ls.back()->insert(10000 * (i+1) + j);
187 ::encode(*ls.front(), bl);
191 int test = max * 100;
192 for (int i=0; i<test; ++i) {
193 for (std::vector<bloom_filter*>::iterator j = ls.begin(); j != ls.end(); ++j) {
194 if ((*j)->contains(i * 732)) { // note: sequential i does not work here; the intenral int hash is weak!!
201 double actual = (double)hit / (double)test;
202 std::cout << "bins " << bins << " bin-max " << max << " bin-fpp " << fpp
203 << " actual-fpp " << actual
204 << " total-size " << bl.length() << std::endl;
208 // disable these tests; doing dual insertions in consecutive filters
209 // appears to be equivalent to doing a single insertion in a bloom
210 // filter that is twice as big.
213 // test the fpp over a sequence of bloom filters, each with unique
214 // items inserted into it.
216 // we expect: actual_fpp = num_filters * per_filter_fpp
217 TEST(BloomFilter, Sequence) {
221 for (int seq = 2; seq <= 128; seq *= 2) {
222 std::vector<bloom_filter*> ls;
223 for (int i=0; i<seq; i++) {
224 ls.push_back(new bloom_filter(max*2, fpp, i));
225 for (int j=0; j<max; j++) {
226 ls.back()->insert("ok" + stringify(j) + "_" + stringify(i));
228 ls[ls.size() - 2]->insert("ok" + stringify(j) + "_" + stringify(i));
233 int test = max * 100;
234 for (int i=0; i<test; ++i) {
235 for (std::vector<bloom_filter*>::iterator j = ls.begin(); j != ls.end(); ++j) {
236 if ((*j)->contains("bad" + stringify(i))) {
243 double actual = (double)hit / (double)test;
244 std::cout << "seq " << seq << " max " << max << " fpp " << fpp << " actual " << actual << std::endl;
248 // test the ffp over a sequence of bloom filters, where actual values
249 // are always inserted into a consecutive pair of filters. in order
250 // to have a false positive, we need to falsely match two consecutive
253 // we expect: actual_fpp = num_filters * per_filter_fpp^2
254 TEST(BloomFilter, SequenceDouble) {
257 for (int seq = 2; seq <= 128; seq *= 2) {
258 std::vector<bloom_filter*> ls;
259 for (int i=0; i<seq; i++) {
260 ls.push_back(new bloom_filter(max*2, fpp, i));
261 for (int j=0; j<max; j++) {
262 ls.back()->insert("ok" + stringify(j) + "_" + stringify(i));
264 ls[ls.size() - 2]->insert("ok" + stringify(j) + "_" + stringify(i));
269 int test = max * 100;
271 for (int i=0; i<test; ++i) {
272 for (std::vector<bloom_filter*>::iterator j = ls.begin(); j != ls.end(); ++j) {
273 if ((*j)->contains("bad" + stringify(i))) {
285 double actual = (double)hit / (double)test;
286 std::cout << "seq " << seq << " max " << max << " fpp " << fpp << " actual " << actual
287 << " expected " << (fpp*fpp*(double)seq) << std::endl;
293 TEST(BloomFilter, Assignement) {
294 bloom_filter bf1(10, .1, 1), bf2;
300 ASSERT_TRUE(bf2.contains("foo"));
301 ASSERT_FALSE(bf2.contains("bar"));
303 ASSERT_EQ(2U, bf1.element_count());
304 ASSERT_EQ(1U, bf2.element_count());