dynarmic/include/tsl/robin_hash.h

1452 lines
52 KiB
C
Raw Normal View History

/**
* MIT License
*
* Copyright (c) 2017 Thibaut Goetghebuer-Planchon <tessil@gmx.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef TSL_ROBIN_HASH_H
#define TSL_ROBIN_HASH_H
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <exception>
#include <iterator>
#include <limits>
#include <memory>
#include <stdexcept>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
#include "robin_growth_policy.h"
namespace tsl {
namespace detail_robin_hash {
template<typename T>
struct make_void {
using type = void;
};
template<typename T, typename = void>
struct has_is_transparent: std::false_type {
};
template<typename T>
struct has_is_transparent<T, typename make_void<typename T::is_transparent>::type>: std::true_type {
};
template<typename U>
struct is_power_of_two_policy: std::false_type {
};
template<std::size_t GrowthFactor>
struct is_power_of_two_policy<tsl::rh::power_of_two_growth_policy<GrowthFactor>>: std::true_type {
};
// Only available in C++17, we need to be compatible with C++11
template<class T>
const T& clamp( const T& v, const T& lo, const T& hi) {
return std::min(hi, std::max(lo, v));
}
template<typename T, typename U>
static T numeric_cast(U value, const char* error_message = "numeric_cast() failed.") {
T ret = static_cast<T>(value);
if(static_cast<U>(ret) != value) {
TSL_RH_THROW_OR_TERMINATE(std::runtime_error, error_message);
}
const bool is_same_signedness = (std::is_unsigned<T>::value && std::is_unsigned<U>::value) ||
(std::is_signed<T>::value && std::is_signed<U>::value);
if(!is_same_signedness && (ret < T{}) != (value < U{})) {
TSL_RH_THROW_OR_TERMINATE(std::runtime_error, error_message);
}
return ret;
}
using truncated_hash_type = std::uint_least32_t;
/**
* Helper class that stores a truncated hash if StoreHash is true and nothing otherwise.
*/
template<bool StoreHash>
class bucket_entry_hash {
public:
bool bucket_hash_equal(std::size_t /*hash*/) const noexcept {
return true;
}
truncated_hash_type truncated_hash() const noexcept {
return 0;
}
protected:
void set_hash(truncated_hash_type /*hash*/) noexcept {
}
};
template<>
class bucket_entry_hash<true> {
public:
bool bucket_hash_equal(std::size_t hash) const noexcept {
return m_hash == truncated_hash_type(hash);
}
truncated_hash_type truncated_hash() const noexcept {
return m_hash;
}
protected:
void set_hash(truncated_hash_type hash) noexcept {
m_hash = truncated_hash_type(hash);
}
private:
truncated_hash_type m_hash;
};
/**
* Each bucket entry has:
* - A value of type `ValueType`.
* - An integer to store how far the value of the bucket, if any, is from its ideal bucket
* (ex: if the current bucket 5 has the value 'foo' and `hash('foo') % nb_buckets` == 3,
* `dist_from_ideal_bucket()` will return 2 as the current value of the bucket is two
* buckets away from its ideal bucket)
* If there is no value in the bucket (i.e. `empty()` is true) `dist_from_ideal_bucket()` will be < 0.
* - A marker which tells us if the bucket is the last bucket of the bucket array (useful for the
* iterator of the hash table).
* - If `StoreHash` is true, 32 bits of the hash of the value, if any, are also stored in the bucket.
* If the size of the hash is more than 32 bits, it is truncated. We don't store the full hash
* as storing the hash is a potential opportunity to use the unused space due to the alignment
* of the bucket_entry structure. We can thus potentially store the hash without any extra space
* (which would not be possible with 64 bits of the hash).
*/
template<typename ValueType, bool StoreHash>
class bucket_entry: public bucket_entry_hash<StoreHash> {
using bucket_hash = bucket_entry_hash<StoreHash>;
public:
using value_type = ValueType;
using distance_type = std::int_least16_t;
bucket_entry() noexcept: bucket_hash(), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET),
m_last_bucket(false)
{
tsl_rh_assert(empty());
}
bucket_entry(bool last_bucket) noexcept: bucket_hash(), m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET),
m_last_bucket(last_bucket)
{
tsl_rh_assert(empty());
}
bucket_entry(const bucket_entry& other) noexcept(std::is_nothrow_copy_constructible<value_type>::value):
bucket_hash(other),
m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET),
m_last_bucket(other.m_last_bucket)
{
if(!other.empty()) {
::new (static_cast<void*>(std::addressof(m_value))) value_type(other.value());
m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket;
}
}
/**
* Never really used, but still necessary as we must call resize on an empty `std::vector<bucket_entry>`.
* and we need to support move-only types. See robin_hash constructor for details.
*/
bucket_entry(bucket_entry&& other) noexcept(std::is_nothrow_move_constructible<value_type>::value):
bucket_hash(std::move(other)),
m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET),
m_last_bucket(other.m_last_bucket)
{
if(!other.empty()) {
::new (static_cast<void*>(std::addressof(m_value))) value_type(std::move(other.value()));
m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket;
}
}
bucket_entry& operator=(const bucket_entry& other)
noexcept(std::is_nothrow_copy_constructible<value_type>::value)
{
if(this != &other) {
clear();
bucket_hash::operator=(other);
if(!other.empty()) {
::new (static_cast<void*>(std::addressof(m_value))) value_type(other.value());
}
m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket;
m_last_bucket = other.m_last_bucket;
}
return *this;
}
bucket_entry& operator=(bucket_entry&& ) = delete;
~bucket_entry() noexcept {
clear();
}
void clear() noexcept {
if(!empty()) {
destroy_value();
m_dist_from_ideal_bucket = EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET;
}
}
bool empty() const noexcept {
return m_dist_from_ideal_bucket == EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET;
}
value_type& value() noexcept {
tsl_rh_assert(!empty());
return *reinterpret_cast<value_type*>(std::addressof(m_value));
}
const value_type& value() const noexcept {
tsl_rh_assert(!empty());
return *reinterpret_cast<const value_type*>(std::addressof(m_value));
}
distance_type dist_from_ideal_bucket() const noexcept {
return m_dist_from_ideal_bucket;
}
bool last_bucket() const noexcept {
return m_last_bucket;
}
void set_as_last_bucket() noexcept {
m_last_bucket = true;
}
template<typename... Args>
void set_value_of_empty_bucket(distance_type dist_from_ideal_bucket,
truncated_hash_type hash, Args&&... value_type_args)
{
tsl_rh_assert(dist_from_ideal_bucket >= 0);
tsl_rh_assert(empty());
::new (static_cast<void*>(std::addressof(m_value))) value_type(std::forward<Args>(value_type_args)...);
this->set_hash(hash);
m_dist_from_ideal_bucket = dist_from_ideal_bucket;
tsl_rh_assert(!empty());
}
void swap_with_value_in_bucket(distance_type& dist_from_ideal_bucket,
truncated_hash_type& hash, value_type& value)
{
tsl_rh_assert(!empty());
using std::swap;
swap(value, this->value());
swap(dist_from_ideal_bucket, m_dist_from_ideal_bucket);
// Avoid warning of unused variable if StoreHash is false
(void) hash;
if(StoreHash) {
const truncated_hash_type tmp_hash = this->truncated_hash();
this->set_hash(hash);
hash = tmp_hash;
}
}
static truncated_hash_type truncate_hash(std::size_t hash) noexcept {
return truncated_hash_type(hash);
}
private:
void destroy_value() noexcept {
tsl_rh_assert(!empty());
value().~value_type();
}
public:
static const distance_type DIST_FROM_IDEAL_BUCKET_LIMIT = 4096;
static_assert(DIST_FROM_IDEAL_BUCKET_LIMIT <= std::numeric_limits<distance_type>::max() - 1,
"DIST_FROM_IDEAL_BUCKET_LIMIT must be <= std::numeric_limits<distance_type>::max() - 1.");
private:
using storage = typename std::aligned_storage<sizeof(value_type), alignof(value_type)>::type;
static const distance_type EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET = -1;
distance_type m_dist_from_ideal_bucket;
bool m_last_bucket;
storage m_value;
};
/**
* Internal common class used by `robin_map` and `robin_set`.
*
* ValueType is what will be stored by `robin_hash` (usually `std::pair<Key, T>` for map and `Key` for set).
*
* `KeySelect` should be a `FunctionObject` which takes a `ValueType` in parameter and returns a
* reference to the key.
*
* `ValueSelect` should be a `FunctionObject` which takes a `ValueType` in parameter and returns a
* reference to the value. `ValueSelect` should be void if there is no value (in a set for example).
*
* The strong exception guarantee only holds if the expression
* `std::is_nothrow_swappable<ValueType>::value && std::is_nothrow_move_constructible<ValueType>::value` is true.
*
* Behaviour is undefined if the destructor of `ValueType` throws.
*/
template<class ValueType,
class KeySelect,
class ValueSelect,
class Hash,
class KeyEqual,
class Allocator,
bool StoreHash,
class GrowthPolicy>
class robin_hash: private Hash, private KeyEqual, private GrowthPolicy {
private:
template<typename U>
using has_mapped_type = typename std::integral_constant<bool, !std::is_same<U, void>::value>;
static_assert(noexcept(std::declval<GrowthPolicy>().bucket_for_hash(std::size_t(0))), "GrowthPolicy::bucket_for_hash must be noexcept.");
static_assert(noexcept(std::declval<GrowthPolicy>().clear()), "GrowthPolicy::clear must be noexcept.");
public:
template<bool IsConst>
class robin_iterator;
using key_type = typename KeySelect::key_type;
using value_type = ValueType;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using hasher = Hash;
using key_equal = KeyEqual;
using allocator_type = Allocator;
using reference = value_type&;
using const_reference = const value_type&;
using pointer = value_type*;
using const_pointer = const value_type*;
using iterator = robin_iterator<false>;
using const_iterator = robin_iterator<true>;
private:
/**
* Either store the hash because we are asked by the `StoreHash` template parameter
* or store the hash because it doesn't cost us anything in size and can be used to speed up rehash.
*/
static constexpr bool STORE_HASH = StoreHash ||
(
(sizeof(tsl::detail_robin_hash::bucket_entry<value_type, true>) ==
sizeof(tsl::detail_robin_hash::bucket_entry<value_type, false>))
&&
(sizeof(std::size_t) == sizeof(truncated_hash_type) ||
is_power_of_two_policy<GrowthPolicy>::value)
&&
// Don't store the hash for primitive types with default hash.
(!std::is_arithmetic<key_type>::value ||
!std::is_same<Hash, std::hash<key_type>>::value)
);
/**
* Only use the stored hash on lookup if we are explicitly asked. We are not sure how slow
* the KeyEqual operation is. An extra comparison may slow things down with a fast KeyEqual.
*/
static constexpr bool USE_STORED_HASH_ON_LOOKUP = StoreHash;
/**
* We can only use the hash on rehash if the size of the hash type is the same as the stored one or
* if we use a power of two modulo. In the case of the power of two modulo, we just mask
* the least significant bytes, we just have to check that the truncated_hash_type didn't truncated
* more bytes.
*/
static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) {
(void) bucket_count;
if(STORE_HASH && sizeof(std::size_t) == sizeof(truncated_hash_type)) {
return true;
}
else if(STORE_HASH && is_power_of_two_policy<GrowthPolicy>::value) {
tsl_rh_assert(bucket_count > 0);
return (bucket_count - 1) <= std::numeric_limits<truncated_hash_type>::max();
}
else {
return false;
}
}
using bucket_entry = tsl::detail_robin_hash::bucket_entry<value_type, STORE_HASH>;
using distance_type = typename bucket_entry::distance_type;
using buckets_allocator = typename std::allocator_traits<allocator_type>::template rebind_alloc<bucket_entry>;
using buckets_container_type = std::vector<bucket_entry, buckets_allocator>;
public:
/**
* The 'operator*()' and 'operator->()' methods return a const reference and const pointer respectively to the
* stored value type.
*
* In case of a map, to get a mutable reference to the value associated to a key (the '.second' in the
* stored pair), you have to call 'value()'.
*
* The main reason for this is that if we returned a `std::pair<Key, T>&` instead
* of a `const std::pair<Key, T>&`, the user may modify the key which will put the map in a undefined state.
*/
template<bool IsConst>
class robin_iterator {
friend class robin_hash;
private:
using bucket_entry_ptr = typename std::conditional<IsConst,
const bucket_entry*,
bucket_entry*>::type;
robin_iterator(bucket_entry_ptr bucket) noexcept: m_bucket(bucket) {
}
public:
using iterator_category = std::forward_iterator_tag;
using value_type = const typename robin_hash::value_type;
using difference_type = std::ptrdiff_t;
using reference = value_type&;
using pointer = value_type*;
robin_iterator() noexcept {
}
// Copy constructor from iterator to const_iterator.
template<bool TIsConst = IsConst, typename std::enable_if<TIsConst>::type* = nullptr>
robin_iterator(const robin_iterator<!TIsConst>& other) noexcept: m_bucket(other.m_bucket) {
}
robin_iterator(const robin_iterator& other) = default;
robin_iterator(robin_iterator&& other) = default;
robin_iterator& operator=(const robin_iterator& other) = default;
robin_iterator& operator=(robin_iterator&& other) = default;
const typename robin_hash::key_type& key() const {
return KeySelect()(m_bucket->value());
}
template<class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value && IsConst>::type* = nullptr>
const typename U::value_type& value() const {
return U()(m_bucket->value());
}
template<class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value && !IsConst>::type* = nullptr>
typename U::value_type& value() const {
return U()(m_bucket->value());
}
reference operator*() const {
return m_bucket->value();
}
pointer operator->() const {
return std::addressof(m_bucket->value());
}
robin_iterator& operator++() {
while(true) {
if(m_bucket->last_bucket()) {
++m_bucket;
return *this;
}
++m_bucket;
if(!m_bucket->empty()) {
return *this;
}
}
}
robin_iterator operator++(int) {
robin_iterator tmp(*this);
++*this;
return tmp;
}
friend bool operator==(const robin_iterator& lhs, const robin_iterator& rhs) {
return lhs.m_bucket == rhs.m_bucket;
}
friend bool operator!=(const robin_iterator& lhs, const robin_iterator& rhs) {
return !(lhs == rhs);
}
private:
bucket_entry_ptr m_bucket;
};
public:
#if defined(__cplusplus) && __cplusplus >= 201402L
robin_hash(size_type bucket_count,
const Hash& hash,
const KeyEqual& equal,
const Allocator& alloc,
float min_load_factor = DEFAULT_MIN_LOAD_FACTOR,
float max_load_factor = DEFAULT_MAX_LOAD_FACTOR):
Hash(hash),
KeyEqual(equal),
GrowthPolicy(bucket_count),
m_buckets_data(
[&]() {
if(bucket_count > max_bucket_count()) {
TSL_RH_THROW_OR_TERMINATE(std::length_error,
"The map exceeds its maximum bucket count.");
}
return bucket_count;
}(), alloc
),
m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()),
m_bucket_count(bucket_count),
m_nb_elements(0),
m_grow_on_next_insert(false),
m_try_shrink_on_next_insert(false)
{
if(m_bucket_count > 0) {
tsl_rh_assert(!m_buckets_data.empty());
m_buckets_data.back().set_as_last_bucket();
}
this->min_load_factor(min_load_factor);
this->max_load_factor(max_load_factor);
}
#else
/**
* C++11 doesn't support the creation of a std::vector with a custom allocator and 'count' default-inserted elements.
* The needed contructor `explicit vector(size_type count, const Allocator& alloc = Allocator());` is only
* available in C++14 and later. We thus must resize after using the `vector(const Allocator& alloc)` constructor.
*
* We can't use `vector(size_type count, const T& value, const Allocator& alloc)` as it requires the
* value T to be copyable.
*/
robin_hash(size_type bucket_count,
const Hash& hash,
const KeyEqual& equal,
const Allocator& alloc,
float min_load_factor = DEFAULT_MIN_LOAD_FACTOR,
float max_load_factor = DEFAULT_MAX_LOAD_FACTOR):
Hash(hash),
KeyEqual(equal),
GrowthPolicy(bucket_count),
m_buckets_data(alloc),
m_buckets(static_empty_bucket_ptr()),
m_bucket_count(bucket_count),
m_nb_elements(0),
m_grow_on_next_insert(false),
m_try_shrink_on_next_insert(false)
{
if(bucket_count > max_bucket_count()) {
TSL_RH_THROW_OR_TERMINATE(std::length_error, "The map exceeds its maximum bucket count.");
}
if(m_bucket_count > 0) {
m_buckets_data.resize(m_bucket_count);
m_buckets = m_buckets_data.data();
tsl_rh_assert(!m_buckets_data.empty());
m_buckets_data.back().set_as_last_bucket();
}
this->min_load_factor(min_load_factor);
this->max_load_factor(max_load_factor);
}
#endif
robin_hash(const robin_hash& other): Hash(other),
KeyEqual(other),
GrowthPolicy(other),
m_buckets_data(other.m_buckets_data),
m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()),
m_bucket_count(other.m_bucket_count),
m_nb_elements(other.m_nb_elements),
m_load_threshold(other.m_load_threshold),
m_min_load_factor(other.m_min_load_factor),
m_max_load_factor(other.m_max_load_factor),
m_grow_on_next_insert(other.m_grow_on_next_insert),
m_try_shrink_on_next_insert(other.m_try_shrink_on_next_insert)
{
}
robin_hash(robin_hash&& other) noexcept(std::is_nothrow_move_constructible<Hash>::value &&
std::is_nothrow_move_constructible<KeyEqual>::value &&
std::is_nothrow_move_constructible<GrowthPolicy>::value &&
std::is_nothrow_move_constructible<buckets_container_type>::value)
: Hash(std::move(static_cast<Hash&>(other))),
KeyEqual(std::move(static_cast<KeyEqual&>(other))),
GrowthPolicy(std::move(static_cast<GrowthPolicy&>(other))),
m_buckets_data(std::move(other.m_buckets_data)),
m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():m_buckets_data.data()),
m_bucket_count(other.m_bucket_count),
m_nb_elements(other.m_nb_elements),
m_load_threshold(other.m_load_threshold),
m_min_load_factor(other.m_min_load_factor),
m_max_load_factor(other.m_max_load_factor),
m_grow_on_next_insert(other.m_grow_on_next_insert),
m_try_shrink_on_next_insert(other.m_try_shrink_on_next_insert)
{
other.clear_and_shrink();
}
robin_hash& operator=(const robin_hash& other) {
if(&other != this) {
Hash::operator=(other);
KeyEqual::operator=(other);
GrowthPolicy::operator=(other);
m_buckets_data = other.m_buckets_data;
m_buckets = m_buckets_data.empty()?static_empty_bucket_ptr():
m_buckets_data.data();
m_bucket_count = other.m_bucket_count;
m_nb_elements = other.m_nb_elements;
m_load_threshold = other.m_load_threshold;
m_min_load_factor = other.m_min_load_factor;
m_max_load_factor = other.m_max_load_factor;
m_grow_on_next_insert = other.m_grow_on_next_insert;
m_try_shrink_on_next_insert = other.m_try_shrink_on_next_insert;
}
return *this;
}
robin_hash& operator=(robin_hash&& other) {
other.swap(*this);
other.clear();
return *this;
}
allocator_type get_allocator() const {
return m_buckets_data.get_allocator();
}
/*
* Iterators
*/
iterator begin() noexcept {
std::size_t i = 0;
while(i < m_bucket_count && m_buckets[i].empty()) {
i++;
}
return iterator(m_buckets + i);
}
const_iterator begin() const noexcept {
return cbegin();
}
const_iterator cbegin() const noexcept {
std::size_t i = 0;
while(i < m_bucket_count && m_buckets[i].empty()) {
i++;
}
return const_iterator(m_buckets + i);
}
iterator end() noexcept {
return iterator(m_buckets + m_bucket_count);
}
const_iterator end() const noexcept {
return cend();
}
const_iterator cend() const noexcept {
return const_iterator(m_buckets + m_bucket_count);
}
/*
* Capacity
*/
bool empty() const noexcept {
return m_nb_elements == 0;
}
size_type size() const noexcept {
return m_nb_elements;
}
size_type max_size() const noexcept {
return m_buckets_data.max_size();
}
/*
* Modifiers
*/
void clear() noexcept {
if(m_min_load_factor > 0.0f) {
clear_and_shrink();
}
else {
for(auto& bucket: m_buckets_data) {
bucket.clear();
}
m_nb_elements = 0;
m_grow_on_next_insert = false;
}
}
template<typename P>
std::pair<iterator, bool> insert(P&& value) {
return insert_impl(KeySelect()(value), std::forward<P>(value));
}
template<typename P>
iterator insert_hint(const_iterator hint, P&& value) {
if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) {
return mutable_iterator(hint);
}
return insert(std::forward<P>(value)).first;
}
template<class InputIt>
void insert(InputIt first, InputIt last) {
if(std::is_base_of<std::forward_iterator_tag,
typename std::iterator_traits<InputIt>::iterator_category>::value)
{
const auto nb_elements_insert = std::distance(first, last);
const size_type nb_free_buckets = m_load_threshold - size();
tsl_rh_assert(m_load_threshold >= size());
if(nb_elements_insert > 0 && nb_free_buckets < size_type(nb_elements_insert)) {
reserve(size() + size_type(nb_elements_insert));
}
}
for(; first != last; ++first) {
insert(*first);
}
}
template<class K, class M>
std::pair<iterator, bool> insert_or_assign(K&& key, M&& obj) {
auto it = try_emplace(std::forward<K>(key), std::forward<M>(obj));
if(!it.second) {
it.first.value() = std::forward<M>(obj);
}
return it;
}
template<class K, class M>
iterator insert_or_assign(const_iterator hint, K&& key, M&& obj) {
if(hint != cend() && compare_keys(KeySelect()(*hint), key)) {
auto it = mutable_iterator(hint);
it.value() = std::forward<M>(obj);
return it;
}
return insert_or_assign(std::forward<K>(key), std::forward<M>(obj)).first;
}
template<class... Args>
std::pair<iterator, bool> emplace(Args&&... args) {
return insert(value_type(std::forward<Args>(args)...));
}
template<class... Args>
iterator emplace_hint(const_iterator hint, Args&&... args) {
return insert_hint(hint, value_type(std::forward<Args>(args)...));
}
template<class K, class... Args>
std::pair<iterator, bool> try_emplace(K&& key, Args&&... args) {
return insert_impl(key, std::piecewise_construct,
std::forward_as_tuple(std::forward<K>(key)),
std::forward_as_tuple(std::forward<Args>(args)...));
}
template<class K, class... Args>
iterator try_emplace_hint(const_iterator hint, K&& key, Args&&... args) {
if(hint != cend() && compare_keys(KeySelect()(*hint), key)) {
return mutable_iterator(hint);
}
return try_emplace(std::forward<K>(key), std::forward<Args>(args)...).first;
}
/**
* Here to avoid `template<class K> size_type erase(const K& key)` being used when
* we use an `iterator` instead of a `const_iterator`.
*/
iterator erase(iterator pos) {
erase_from_bucket(pos);
/**
* Erase bucket used a backward shift after clearing the bucket.
* Check if there is a new value in the bucket, if not get the next non-empty.
*/
if(pos.m_bucket->empty()) {
++pos;
}
m_try_shrink_on_next_insert = true;
return pos;
}
iterator erase(const_iterator pos) {
return erase(mutable_iterator(pos));
}
iterator erase(const_iterator first, const_iterator last) {
if(first == last) {
return mutable_iterator(first);
}
auto first_mutable = mutable_iterator(first);
auto last_mutable = mutable_iterator(last);
for(auto it = first_mutable.m_bucket; it != last_mutable.m_bucket; ++it) {
if(!it->empty()) {
it->clear();
m_nb_elements--;
}
}
if(last_mutable == end()) {
m_try_shrink_on_next_insert = true;
return end();
}
/*
* Backward shift on the values which come after the deleted values.
* We try to move the values closer to their ideal bucket.
*/
std::size_t icloser_bucket = static_cast<std::size_t>(first_mutable.m_bucket - m_buckets);
std::size_t ito_move_closer_value = static_cast<std::size_t>(last_mutable.m_bucket - m_buckets);
tsl_rh_assert(ito_move_closer_value > icloser_bucket);
const std::size_t ireturn_bucket = ito_move_closer_value -
std::min(ito_move_closer_value - icloser_bucket,
std::size_t(m_buckets[ito_move_closer_value].dist_from_ideal_bucket()));
while(ito_move_closer_value < m_bucket_count && m_buckets[ito_move_closer_value].dist_from_ideal_bucket() > 0) {
icloser_bucket = ito_move_closer_value -
std::min(ito_move_closer_value - icloser_bucket,
std::size_t(m_buckets[ito_move_closer_value].dist_from_ideal_bucket()));
tsl_rh_assert(m_buckets[icloser_bucket].empty());
const distance_type new_distance = distance_type(m_buckets[ito_move_closer_value].dist_from_ideal_bucket() -
(ito_move_closer_value - icloser_bucket));
m_buckets[icloser_bucket].set_value_of_empty_bucket(new_distance,
m_buckets[ito_move_closer_value].truncated_hash(),
std::move(m_buckets[ito_move_closer_value].value()));
m_buckets[ito_move_closer_value].clear();
++icloser_bucket;
++ito_move_closer_value;
}
m_try_shrink_on_next_insert = true;
return iterator(m_buckets + ireturn_bucket);
}
template<class K>
size_type erase(const K& key) {
return erase(key, hash_key(key));
}
template<class K>
size_type erase(const K& key, std::size_t hash) {
auto it = find(key, hash);
if(it != end()) {
erase_from_bucket(it);
m_try_shrink_on_next_insert = true;
return 1;
}
else {
return 0;
}
}
void swap(robin_hash& other) {
using std::swap;
swap(static_cast<Hash&>(*this), static_cast<Hash&>(other));
swap(static_cast<KeyEqual&>(*this), static_cast<KeyEqual&>(other));
swap(static_cast<GrowthPolicy&>(*this), static_cast<GrowthPolicy&>(other));
swap(m_buckets_data, other.m_buckets_data);
swap(m_buckets, other.m_buckets);
swap(m_bucket_count, other.m_bucket_count);
swap(m_nb_elements, other.m_nb_elements);
swap(m_load_threshold, other.m_load_threshold);
swap(m_min_load_factor, other.m_min_load_factor);
swap(m_max_load_factor, other.m_max_load_factor);
swap(m_grow_on_next_insert, other.m_grow_on_next_insert);
swap(m_try_shrink_on_next_insert, other.m_try_shrink_on_next_insert);
}
/*
* Lookup
*/
template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
typename U::value_type& at(const K& key) {
return at(key, hash_key(key));
}
template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
typename U::value_type& at(const K& key, std::size_t hash) {
return const_cast<typename U::value_type&>(static_cast<const robin_hash*>(this)->at(key, hash));
}
template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
const typename U::value_type& at(const K& key) const {
return at(key, hash_key(key));
}
template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
const typename U::value_type& at(const K& key, std::size_t hash) const {
auto it = find(key, hash);
if(it != cend()) {
return it.value();
}
else {
TSL_RH_THROW_OR_TERMINATE(std::out_of_range, "Couldn't find key.");
}
}
template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
typename U::value_type& operator[](K&& key) {
return try_emplace(std::forward<K>(key)).first.value();
}
template<class K>
size_type count(const K& key) const {
return count(key, hash_key(key));
}
template<class K>
size_type count(const K& key, std::size_t hash) const {
if(find(key, hash) != cend()) {
return 1;
}
else {
return 0;
}
}
template<class K>
iterator find(const K& key) {
return find_impl(key, hash_key(key));
}
template<class K>
iterator find(const K& key, std::size_t hash) {
return find_impl(key, hash);
}
template<class K>
const_iterator find(const K& key) const {
return find_impl(key, hash_key(key));
}
template<class K>
const_iterator find(const K& key, std::size_t hash) const {
return find_impl(key, hash);
}
template<class K>
bool contains(const K& key) const {
return contains(key, hash_key(key));
}
template<class K>
bool contains(const K& key, std::size_t hash) const {
return count(key, hash) != 0;
}
template<class K>
std::pair<iterator, iterator> equal_range(const K& key) {
return equal_range(key, hash_key(key));
}
template<class K>
std::pair<iterator, iterator> equal_range(const K& key, std::size_t hash) {
iterator it = find(key, hash);
return std::make_pair(it, (it == end())?it:std::next(it));
}
template<class K>
std::pair<const_iterator, const_iterator> equal_range(const K& key) const {
return equal_range(key, hash_key(key));
}
template<class K>
std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t hash) const {
const_iterator it = find(key, hash);
return std::make_pair(it, (it == cend())?it:std::next(it));
}
/*
* Bucket interface
*/
size_type bucket_count() const {
return m_bucket_count;
}
size_type max_bucket_count() const {
return std::min(GrowthPolicy::max_bucket_count(), m_buckets_data.max_size());
}
/*
* Hash policy
*/
float load_factor() const {
if(bucket_count() == 0) {
return 0;
}
return float(m_nb_elements)/float(bucket_count());
}
float min_load_factor() const {
return m_min_load_factor;
}
float max_load_factor() const {
return m_max_load_factor;
}
void min_load_factor(float ml) {
m_min_load_factor = clamp(ml, float(MINIMUM_MIN_LOAD_FACTOR),
float(MAXIMUM_MIN_LOAD_FACTOR));
}
void max_load_factor(float ml) {
m_max_load_factor = clamp(ml, float(MINIMUM_MAX_LOAD_FACTOR),
float(MAXIMUM_MAX_LOAD_FACTOR));
m_load_threshold = size_type(float(bucket_count())*m_max_load_factor);
}
void rehash(size_type count) {
count = std::max(count, size_type(std::ceil(float(size())/max_load_factor())));
rehash_impl(count);
}
void reserve(size_type count) {
rehash(size_type(std::ceil(float(count)/max_load_factor())));
}
/*
* Observers
*/
hasher hash_function() const {
return static_cast<const Hash&>(*this);
}
key_equal key_eq() const {
return static_cast<const KeyEqual&>(*this);
}
/*
* Other
*/
iterator mutable_iterator(const_iterator pos) {
return iterator(const_cast<bucket_entry*>(pos.m_bucket));
}
private:
template<class K>
std::size_t hash_key(const K& key) const {
return Hash::operator()(key);
}
template<class K1, class K2>
bool compare_keys(const K1& key1, const K2& key2) const {
return KeyEqual::operator()(key1, key2);
}
std::size_t bucket_for_hash(std::size_t hash) const {
const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash);
tsl_rh_assert(bucket < m_bucket_count || (bucket == 0 && m_bucket_count == 0));
return bucket;
}
template<class U = GrowthPolicy, typename std::enable_if<is_power_of_two_policy<U>::value>::type* = nullptr>
std::size_t next_bucket(std::size_t index) const noexcept {
tsl_rh_assert(index < bucket_count());
return (index + 1) & this->m_mask;
}
template<class U = GrowthPolicy, typename std::enable_if<!is_power_of_two_policy<U>::value>::type* = nullptr>
std::size_t next_bucket(std::size_t index) const noexcept {
tsl_rh_assert(index < bucket_count());
index++;
return (index != bucket_count())?index:0;
}
template<class K>
iterator find_impl(const K& key, std::size_t hash) {
return mutable_iterator(static_cast<const robin_hash*>(this)->find(key, hash));
}
template<class K>
const_iterator find_impl(const K& key, std::size_t hash) const {
std::size_t ibucket = bucket_for_hash(hash);
distance_type dist_from_ideal_bucket = 0;
while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) {
if(TSL_RH_LIKELY((!USE_STORED_HASH_ON_LOOKUP || m_buckets[ibucket].bucket_hash_equal(hash)) &&
compare_keys(KeySelect()(m_buckets[ibucket].value()), key)))
{
return const_iterator(m_buckets + ibucket);
}
ibucket = next_bucket(ibucket);
dist_from_ideal_bucket++;
}
return cend();
}
void erase_from_bucket(iterator pos) {
pos.m_bucket->clear();
m_nb_elements--;
/**
* Backward shift, swap the empty bucket, previous_ibucket, with the values on its right, ibucket,
* until we cross another empty bucket or if the other bucket has a distance_from_ideal_bucket == 0.
*
* We try to move the values closer to their ideal bucket.
*/
std::size_t previous_ibucket = static_cast<std::size_t>(pos.m_bucket - m_buckets);
std::size_t ibucket = next_bucket(previous_ibucket);
while(m_buckets[ibucket].dist_from_ideal_bucket() > 0) {
tsl_rh_assert(m_buckets[previous_ibucket].empty());
const distance_type new_distance = distance_type(m_buckets[ibucket].dist_from_ideal_bucket() - 1);
m_buckets[previous_ibucket].set_value_of_empty_bucket(new_distance, m_buckets[ibucket].truncated_hash(),
std::move(m_buckets[ibucket].value()));
m_buckets[ibucket].clear();
previous_ibucket = ibucket;
ibucket = next_bucket(ibucket);
}
}
template<class K, class... Args>
std::pair<iterator, bool> insert_impl(const K& key, Args&&... value_type_args) {
const std::size_t hash = hash_key(key);
std::size_t ibucket = bucket_for_hash(hash);
distance_type dist_from_ideal_bucket = 0;
while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) {
if((!USE_STORED_HASH_ON_LOOKUP || m_buckets[ibucket].bucket_hash_equal(hash)) &&
compare_keys(KeySelect()(m_buckets[ibucket].value()), key))
{
return std::make_pair(iterator(m_buckets + ibucket), false);
}
ibucket = next_bucket(ibucket);
dist_from_ideal_bucket++;
}
if(rehash_on_extreme_load()) {
ibucket = bucket_for_hash(hash);
dist_from_ideal_bucket = 0;
while(dist_from_ideal_bucket <= m_buckets[ibucket].dist_from_ideal_bucket()) {
ibucket = next_bucket(ibucket);
dist_from_ideal_bucket++;
}
}
if(m_buckets[ibucket].empty()) {
m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, bucket_entry::truncate_hash(hash),
std::forward<Args>(value_type_args)...);
}
else {
insert_value(ibucket, dist_from_ideal_bucket, bucket_entry::truncate_hash(hash),
std::forward<Args>(value_type_args)...);
}
m_nb_elements++;
/*
* The value will be inserted in ibucket in any case, either because it was
* empty or by stealing the bucket (robin hood).
*/
return std::make_pair(iterator(m_buckets + ibucket), true);
}
template<class... Args>
void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket,
truncated_hash_type hash, Args&&... value_type_args)
{
value_type value(std::forward<Args>(value_type_args)...);
insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value);
}
void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket,
truncated_hash_type hash, value_type&& value)
{
insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value);
}
/*
* We don't use `value_type&& value` as last argument due to a bug in MSVC when `value_type` is a pointer,
* The compiler is not able to see the difference between `std::string*` and `std::string*&&` resulting in
* a compilation error.
*
* The `value` will be in a moved state at the end of the function.
*/
void insert_value_impl(std::size_t ibucket, distance_type dist_from_ideal_bucket,
truncated_hash_type hash, value_type& value)
{
m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value);
ibucket = next_bucket(ibucket);
dist_from_ideal_bucket++;
while(!m_buckets[ibucket].empty()) {
if(dist_from_ideal_bucket > m_buckets[ibucket].dist_from_ideal_bucket()) {
if(dist_from_ideal_bucket >= bucket_entry::DIST_FROM_IDEAL_BUCKET_LIMIT) {
/**
* The number of probes is really high, rehash the map on the next insert.
* Difficult to do now as rehash may throw an exception.
*/
m_grow_on_next_insert = true;
}
m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value);
}
ibucket = next_bucket(ibucket);
dist_from_ideal_bucket++;
}
m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash, std::move(value));
}
void rehash_impl(size_type count) {
robin_hash new_table(count, static_cast<Hash&>(*this), static_cast<KeyEqual&>(*this),
get_allocator(), m_min_load_factor, m_max_load_factor);
const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_table.bucket_count());
for(auto& bucket: m_buckets_data) {
if(bucket.empty()) {
continue;
}
const std::size_t hash = use_stored_hash?bucket.truncated_hash():
new_table.hash_key(KeySelect()(bucket.value()));
new_table.insert_value_on_rehash(new_table.bucket_for_hash(hash), 0,
bucket_entry::truncate_hash(hash), std::move(bucket.value()));
}
new_table.m_nb_elements = m_nb_elements;
new_table.swap(*this);
}
void clear_and_shrink() noexcept {
GrowthPolicy::clear();
m_buckets_data.clear();
m_buckets = static_empty_bucket_ptr();
m_bucket_count = 0;
m_nb_elements = 0;
m_load_threshold = 0;
m_grow_on_next_insert = false;
m_try_shrink_on_next_insert = false;
}
void insert_value_on_rehash(std::size_t ibucket, distance_type dist_from_ideal_bucket,
truncated_hash_type hash, value_type&& value)
{
while(true) {
if(dist_from_ideal_bucket > m_buckets[ibucket].dist_from_ideal_bucket()) {
if(m_buckets[ibucket].empty()) {
m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash, std::move(value));
return;
}
else {
m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, value);
}
}
dist_from_ideal_bucket++;
ibucket = next_bucket(ibucket);
}
}
/**
* Grow the table if m_grow_on_next_insert is true or we reached the max_load_factor.
* Shrink the table if m_try_shrink_on_next_insert is true (an erase occurred) and
* we're below the min_load_factor.
*
* Return true if the table has been rehashed.
*/
bool rehash_on_extreme_load() {
if(m_grow_on_next_insert || size() >= m_load_threshold) {
rehash_impl(GrowthPolicy::next_bucket_count());
m_grow_on_next_insert = false;
return true;
}
if(m_try_shrink_on_next_insert) {
m_try_shrink_on_next_insert = false;
if(m_min_load_factor != 0.0f && load_factor() < m_min_load_factor) {
reserve(size() + 1);
return true;
}
}
return false;
}
public:
static const size_type DEFAULT_INIT_BUCKETS_SIZE = 0;
static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.5f;
static constexpr float MINIMUM_MAX_LOAD_FACTOR = 0.2f;
static constexpr float MAXIMUM_MAX_LOAD_FACTOR = 0.95f;
static constexpr float DEFAULT_MIN_LOAD_FACTOR = 0.0f;
static constexpr float MINIMUM_MIN_LOAD_FACTOR = 0.0f;
static constexpr float MAXIMUM_MIN_LOAD_FACTOR = 0.15f;
static_assert(MINIMUM_MAX_LOAD_FACTOR < MAXIMUM_MAX_LOAD_FACTOR,
"MINIMUM_MAX_LOAD_FACTOR should be < MAXIMUM_MAX_LOAD_FACTOR");
static_assert(MINIMUM_MIN_LOAD_FACTOR < MAXIMUM_MIN_LOAD_FACTOR,
"MINIMUM_MIN_LOAD_FACTOR should be < MAXIMUM_MIN_LOAD_FACTOR");
static_assert(MAXIMUM_MIN_LOAD_FACTOR < MINIMUM_MAX_LOAD_FACTOR,
"MAXIMUM_MIN_LOAD_FACTOR should be < MINIMUM_MAX_LOAD_FACTOR");
private:
/**
* Return an always valid pointer to an static empty bucket_entry with last_bucket() == true.
*/
bucket_entry* static_empty_bucket_ptr() noexcept {
static bucket_entry empty_bucket(true);
return &empty_bucket;
}
private:
buckets_container_type m_buckets_data;
/**
* Points to m_buckets_data.data() if !m_buckets_data.empty() otherwise points to static_empty_bucket_ptr.
* This variable is useful to avoid the cost of checking if m_buckets_data is empty when trying
* to find an element.
*
* TODO Remove m_buckets_data and only use a pointer instead of a pointer+vector to save some space in the robin_hash object.
* Manage the Allocator manually.
*/
bucket_entry* m_buckets;
/**
* Used a lot in find, avoid the call to m_buckets_data.size() which is a bit slower.
*/
size_type m_bucket_count;
size_type m_nb_elements;
size_type m_load_threshold;
float m_min_load_factor;
float m_max_load_factor;
bool m_grow_on_next_insert;
/**
* We can't shrink down the map on erase operations as the erase methods need to return the next iterator.
* Shrinking the map would invalidate all the iterators and we could not return the next iterator in a meaningful way,
* On erase, we thus just indicate on erase that we should try to shrink the hash table on the next insert
* if we go below the min_load_factor.
*/
bool m_try_shrink_on_next_insert;
};
}
}
#endif