NetSurf
bloom.c
Go to the documentation of this file.
1/*
2 * Copyright 2013 Rob Kendrick <rjek@netsurf-browser.org>
3 *
4 * This file is part of NetSurf, http://www.netsurf-browser.org/
5 *
6 * NetSurf is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 of the License.
9 *
10 * NetSurf is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19/**
20 * \file
21 * Trivial bloom filter
22 */
23
24#include <stdlib.h>
25#include "utils/bloom.h"
26#include "utils/utils.h"
27
28/**
29 * Hash a string, returning a 32bit value. The hash algorithm used is
30 * Fowler Noll Vo - a very fast and simple hash, ideal for short strings.
31 * See http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash for more details.
32 *
33 * \param datum The string to hash.
34 * \param len size_t of data length.
35 * \return The calculated hash value for the datum.
36 */
37
38static inline uint32_t fnv(const char *datum, size_t len)
39{
40 uint32_t z = 0x811c9dc5;
41
42 if (datum == NULL)
43 return 0;
44
45 while (len--) {
46 z *= 0x01000193;
47 z ^= *datum++;
48 }
49
50 return z;
51}
52
54 size_t size;
55 uint32_t items;
57};
58
60{
61 struct bloom_filter *r = calloc(sizeof(*r) + size, 1);
62
63 if (r == NULL)
64 return NULL;
65
66 r->size = size;
67
68 return r;
69}
70
72{
73 free(b);
74}
75
76void bloom_insert_str(struct bloom_filter *b, const char *s, size_t z)
77{
78 uint32_t hash = fnv(s, z);
79 bloom_insert_hash(b, hash);
80}
81
82void bloom_insert_hash(struct bloom_filter *b, uint32_t hash)
83{
84 unsigned int index = hash % (b->size << 3);
85 unsigned int byte_index = index >> 3;
86 unsigned int bit_index = index & 7;
87
88 b->filter[byte_index] |= (1 << bit_index);
89 b->items++;
90}
91
92bool bloom_search_str(struct bloom_filter *b, const char *s, size_t z)
93{
94 uint32_t hash = fnv(s, z);
95 return bloom_search_hash(b, hash);
96}
97
98bool bloom_search_hash(struct bloom_filter *b, uint32_t hash)
99{
100 unsigned int index = hash % (b->size << 3);
101 unsigned int byte_index = index >> 3;
102 unsigned int bit_index = index & 7;
103
104 return (b->filter[byte_index] & (1 << bit_index)) != 0;
105}
106
107uint32_t bloom_items(struct bloom_filter *b)
108{
109 return b->items;
110}
111
uint32_t bloom_items(struct bloom_filter *b)
Find out how many items have been added to this bloom filter.
Definition: bloom.c:107
struct bloom_filter * bloom_create(size_t size)
Create a new bloom filter.
Definition: bloom.c:59
void bloom_destroy(struct bloom_filter *b)
Destroy a previously-created bloom filter.
Definition: bloom.c:71
bool bloom_search_str(struct bloom_filter *b, const char *s, size_t z)
Search the filter for the given string, assuming it was added by bloom_insert_str().
Definition: bloom.c:92
void bloom_insert_str(struct bloom_filter *b, const char *s, size_t z)
Insert a string of given length (may include NULs) into the filter, using an internal hash function.
Definition: bloom.c:76
bool bloom_search_hash(struct bloom_filter *b, uint32_t hash)
Search the filter for the given hash value, assuming it was added by bloom_insert_hash().
Definition: bloom.c:98
static uint32_t fnv(const char *datum, size_t len)
Hash a string, returning a 32bit value.
Definition: bloom.c:38
void bloom_insert_hash(struct bloom_filter *b, uint32_t hash)
Insert a given hash value into the filter, should you already have one to hand.
Definition: bloom.c:82
Trivial bloom filter.
uint32_t items
Definition: bloom.c:55
uint8_t filter[FLEX_ARRAY_LEN_DECL]
Definition: bloom.c:56
size_t size
Definition: bloom.c:54
Interface to a number of general purpose functionality.
#define FLEX_ARRAY_LEN_DECL
Definition: utils.h:66