| #ifndef BLOOM_H |
| #define BLOOM_H |
| |
| struct commit; |
| struct repository; |
| struct commit_graph; |
| |
| struct bloom_filter_settings { |
| /* |
| * The version of the hashing technique being used. |
| * The newest version is 2, which is |
| * the seeded murmur3 hashing technique implemented |
| * in bloom.c. Bloom filters of version 1 were created |
| * with prior versions of Git, which had a bug in the |
| * implementation of the hash function. |
| */ |
| uint32_t hash_version; |
| |
| /* |
| * The number of times a path is hashed, i.e. the |
| * number of bit positions that cumulatively |
| * determine whether a path is present in the |
| * Bloom filter. |
| */ |
| uint32_t num_hashes; |
| |
| /* |
| * The minimum number of bits per entry in the Bloom |
| * filter. If the filter contains 'n' entries, then |
| * filter size is the minimum number of 8-bit words |
| * that contain n*b bits. |
| */ |
| uint32_t bits_per_entry; |
| |
| /* |
| * The maximum number of changed paths per commit |
| * before declaring a Bloom filter to be too-large. |
| * |
| * Not written to the commit-graph file. |
| */ |
| uint32_t max_changed_paths; |
| }; |
| |
| #define DEFAULT_BLOOM_MAX_CHANGES 512 |
| #define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES } |
| #define BITS_PER_WORD 8 |
| #define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t) |
| |
| /* |
| * A bloom_filter struct represents a data segment to |
| * use when testing hash values. The 'len' member |
| * dictates how many entries are stored in |
| * 'data'. |
| */ |
| struct bloom_filter { |
| unsigned char *data; |
| size_t len; |
| int version; |
| |
| void *to_free; |
| }; |
| |
| /* |
| * A bloom_key represents the k hash values for a |
| * given string. These can be precomputed and |
| * stored in a bloom_key for re-use when testing |
| * against a bloom_filter. The number of hashes is |
| * given by the Bloom filter settings and is the same |
| * for all Bloom filters and keys interacting with |
| * the loaded version of the commit graph file and |
| * the Bloom data chunks. |
| */ |
| struct bloom_key { |
| uint32_t *hashes; |
| }; |
| |
| /* |
| * A bloom_keyvec is a vector of bloom_keys, which |
| * can be used to store multiple keys for a single |
| * pathspec item. |
| */ |
| struct bloom_keyvec { |
| size_t count; |
| struct bloom_key key[FLEX_ARRAY]; |
| }; |
| |
| int load_bloom_filter_from_graph(struct commit_graph *g, |
| struct bloom_filter *filter, |
| uint32_t graph_pos); |
| |
| void bloom_key_fill(struct bloom_key *key, const char *data, size_t len, |
| const struct bloom_filter_settings *settings); |
| void bloom_key_clear(struct bloom_key *key); |
| |
| /* |
| * bloom_keyvec_new - Allocate and populate a bloom_keyvec with keys for the |
| * given path. |
| * |
| * This function splits the input path by '/' and generates a bloom key for each |
| * prefix, in reverse order of specificity. For example, given the input |
| * "a/b/c", it will generate bloom keys for: |
| * - "a/b/c" |
| * - "a/b" |
| * - "a" |
| * |
| * The resulting keys are stored in a newly allocated bloom_keyvec. |
| */ |
| struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len, |
| const struct bloom_filter_settings *settings); |
| void bloom_keyvec_free(struct bloom_keyvec *vec); |
| |
| void add_key_to_filter(const struct bloom_key *key, |
| struct bloom_filter *filter, |
| const struct bloom_filter_settings *settings); |
| |
| void init_bloom_filters(void); |
| void deinit_bloom_filters(void); |
| |
| enum bloom_filter_computed { |
| BLOOM_NOT_COMPUTED = (1 << 0), |
| BLOOM_COMPUTED = (1 << 1), |
| BLOOM_TRUNC_LARGE = (1 << 2), |
| BLOOM_TRUNC_EMPTY = (1 << 3), |
| BLOOM_UPGRADED = (1 << 4), |
| }; |
| |
| struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, |
| struct commit *c, |
| int compute_if_not_present, |
| const struct bloom_filter_settings *settings, |
| enum bloom_filter_computed *computed); |
| |
| /* |
| * Find the Bloom filter associated with the given commit "c". |
| * |
| * If any of the following are true |
| * |
| * - the repository does not have a commit-graph, or |
| * - the repository disables reading from the commit-graph, or |
| * - the given commit does not have a Bloom filter computed, or |
| * - there is a Bloom filter for commit "c", but it cannot be read |
| * because the filter uses an incompatible version of murmur3 |
| * |
| * , then `get_bloom_filter()` will return NULL. Otherwise, the corresponding |
| * Bloom filter will be returned. |
| * |
| * For callers who wish to inspect Bloom filters with incompatible hash |
| * versions, use get_or_compute_bloom_filter(). |
| */ |
| struct bloom_filter *get_bloom_filter(struct repository *r, struct commit *c); |
| |
| int bloom_filter_contains(const struct bloom_filter *filter, |
| const struct bloom_key *key, |
| const struct bloom_filter_settings *settings); |
| |
| /* |
| * bloom_filter_contains_vec - Check if all keys in a key vector are in the |
| * Bloom filter. |
| * |
| * Returns 1 if **all** keys in the vector are present in the filter, |
| * 0 if **any** key is not present. |
| */ |
| int bloom_filter_contains_vec(const struct bloom_filter *filter, |
| const struct bloom_keyvec *v, |
| const struct bloom_filter_settings *settings); |
| |
| uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len, |
| int version); |
| |
| #endif |