Radish alpha
h
Radicle Heartwood Protocol & Stack
Radicle
Git (anonymous pull)
Log in to clone via SSH
Add variable bloom filter sizes
Alexis Sellier committed 3 years ago
commit 2d52a53ce5e4f141148a5f770cfd3ead2d6a45b8
parent 85ac8a1d4db5e0b4337e863485de94124858fa83
4 files changed +145 -18
modified radicle-node/src/service/filter.rs
@@ -1,38 +1,68 @@
+
#![allow(clippy::identity_op)]
use std::ops::{Deref, DerefMut};

pub use bloomy::BloomFilter;

use crate::identity::Id;

-
/// Size in bytes of subscription bloom filter.
-
pub const FILTER_SIZE: usize = 1024 * 16;
+
/// Size in bytes of *large* bloom filter.
+
/// It can store about 13'675 items with a false positive rate of 1%.
+
pub const FILTER_SIZE_L: usize = 16 * 1024;
+
/// Size in bytes of *medium* bloom filter.
+
/// It can store about 3'419 items with a false positive rate of 1%.
+
pub const FILTER_SIZE_M: usize = 4 * 1024;
+
/// Size in bytes of *small* bloom filter.
+
/// It can store about 855 items with a false positive rate of 1%.
+
pub const FILTER_SIZE_S: usize = 1 * 1024;
+

+
/// Valid filter sizes.
+
pub const FILTER_SIZES: [usize; 3] = [FILTER_SIZE_S, FILTER_SIZE_M, FILTER_SIZE_L];
+

+
/// Target false positive rate of filter.
+
pub const FILTER_FP_RATE: f64 = 0.01;
/// Number of hashes used for bloom filter.
pub const FILTER_HASHES: usize = 7;

-
/// Subscription filter.
+
/// Inventory filter used for subscriptions and inventory comparison.
///
/// The [`Default`] instance has all bits set to `1`, ie. it will match
/// everything.
-
///
-
/// Nb. This filter doesn't currently support inserting public keys.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Filter(BloomFilter<Id>);

impl Default for Filter {
    fn default() -> Self {
-
        Self(BloomFilter::from(vec![0xff; FILTER_SIZE]))
+
        Self(BloomFilter::from(vec![0xff; FILTER_SIZE_S]))
    }
}

impl Filter {
+
    /// Create a new filter with the given items.
+
    ///
+
    /// Uses the iterator's size hint to determine the size of the filter.
    pub fn new<'a>(ids: impl IntoIterator<Item = &'a Id>) -> Self {
-
        let mut bloom = BloomFilter::with_size(FILTER_SIZE);
+
        let iterator = ids.into_iter();
+
        let (min, _) = iterator.size_hint();
+
        let size = bloomy::bloom::optimal_bits(min, FILTER_FP_RATE) / 8;
+
        let size = if size > FILTER_SIZE_M {
+
            FILTER_SIZE_L
+
        } else if size > FILTER_SIZE_S {
+
            FILTER_SIZE_M
+
        } else {
+
            FILTER_SIZE_S
+
        };
+
        let mut bloom = BloomFilter::with_size(size);

-
        for id in ids.into_iter() {
+
        for id in iterator {
            bloom.insert(id);
        }
        Self(bloom)
    }
+

+
    /// Size in bytes.
+
    pub fn size(&self) -> usize {
+
        self.0.bits() / 8
+
    }
}

impl Deref for Filter {
@@ -54,3 +84,65 @@ impl From<BloomFilter<Id>> for Filter {
        Self(bloom)
    }
}
+

+
#[cfg(test)]
+
mod test {
+
    use super::*;
+
    use crate::test::arbitrary;
+

+
    #[test]
+
    fn test_parameters() {
+
        // To store 10'000 items with a false positive rate of 1%, we need about 12KB.
+
        assert_eq!(bloomy::bloom::optimal_bits(10_000, 0.01) / 8, 11_981);
+
        // To store 1'000 items with a false positive rate of 1%, we need about 1KB.
+
        assert_eq!(bloomy::bloom::optimal_bits(1_000, 0.01) / 8, 1198);
+
        // To store 100 items with a false positive rate of 1%, we need about 120B.
+
        assert_eq!(bloomy::bloom::optimal_bits(100, 0.01) / 8, 119);
+

+
        // With 16KB, we can store 13'675 items with a false positive rate of 1%.
+
        assert_eq!(
+
            bloomy::bloom::optimal_capacity(FILTER_SIZE_L * 8, FILTER_FP_RATE),
+
            13_675
+
        );
+
        // With 4KB, we can store 3'419 items with a false positive rate of 1%.
+
        assert_eq!(
+
            bloomy::bloom::optimal_capacity(FILTER_SIZE_M * 8, FILTER_FP_RATE),
+
            3419
+
        );
+
        // With 1KB, we can store 855 items with a false positive rate of 1%.
+
        assert_eq!(
+
            bloomy::bloom::optimal_capacity(FILTER_SIZE_S * 8, FILTER_FP_RATE),
+
            855
+
        );
+

+
        assert_eq!(
+
            bloomy::bloom::optimal_hashes(FILTER_SIZE_L * 8, 13_675),
+
            FILTER_HASHES
+
        );
+
        assert_eq!(
+
            bloomy::bloom::optimal_hashes(FILTER_SIZE_M * 8, 3419),
+
            FILTER_HASHES
+
        );
+
        assert_eq!(
+
            bloomy::bloom::optimal_hashes(FILTER_SIZE_S * 8, 855),
+
            FILTER_HASHES
+
        );
+
    }
+

+
    #[test]
+
    fn test_sizes() {
+
        let ids = arbitrary::vec::<Id>(3420);
+
        let f = Filter::new(ids.iter().take(10));
+
        assert_eq!(f.size(), FILTER_SIZE_S);
+

+
        let f = Filter::new(ids.iter().take(1000));
+
        assert_eq!(f.size(), FILTER_SIZE_M);
+

+
        let f = Filter::new(ids.iter());
+
        assert_eq!(f.size(), FILTER_SIZE_L);
+

+
        // Just checking that iterators over hash sets give correct size hints.
+
        let hs = arbitrary::set::<Id>(42..=42);
+
        assert_eq!(hs.iter().size_hint(), (42, Some(42)));
+
    }
+
}
modified radicle-node/src/test/arbitrary.rs
@@ -5,7 +5,7 @@ use quickcheck::Arbitrary;

use crate::crypto;
use crate::prelude::{Id, NodeId, Refs, Timestamp};
-
use crate::service::filter::{Filter, FILTER_SIZE};
+
use crate::service::filter::{Filter, FILTER_SIZE_L, FILTER_SIZE_M, FILTER_SIZE_S};
use crate::service::message::{
    Address, Announcement, Envelope, InventoryAnnouncement, Message, NodeAnnouncement,
    RefsAnnouncement, Subscribe,
@@ -16,7 +16,10 @@ pub use radicle::test::arbitrary::*;

impl Arbitrary for Filter {
    fn arbitrary(g: &mut quickcheck::Gen) -> Self {
-
        let mut bytes = vec![0; FILTER_SIZE];
+
        let size = *g
+
            .choose(&[FILTER_SIZE_S, FILTER_SIZE_M, FILTER_SIZE_L])
+
            .unwrap();
+
        let mut bytes = vec![0; size];
        for _ in 0..64 {
            let index = usize::arbitrary(g) % bytes.len();
            bytes[index] = u8::arbitrary(g);
modified radicle-node/src/wire.rs
@@ -403,16 +403,18 @@ impl Encode for filter::Filter {

impl Decode for filter::Filter {
    fn decode<R: std::io::Read + ?Sized>(reader: &mut R) -> Result<Self, Error> {
-
        let size: Size = Decode::decode(reader)?;
-
        if size as usize != filter::FILTER_SIZE {
-
            return Err(Error::InvalidFilterSize(size as usize));
+
        let size: usize = Size::decode(reader)? as usize;
+
        if !filter::FILTER_SIZES.contains(&size) {
+
            return Err(Error::InvalidFilterSize(size));
        }
-
        let bytes: [u8; filter::FILTER_SIZE] = Decode::decode(reader)?;
-
        let bf = filter::BloomFilter::from(Vec::from(bytes));

-
        debug_assert_eq!(bf.hashes(), filter::FILTER_HASHES);
+
        let mut bytes = vec![0; size];
+
        reader.read_exact(&mut bytes[..])?;

-
        Ok(Self::from(bf))
+
        let f = filter::BloomFilter::from(bytes);
+
        debug_assert_eq!(f.hashes(), filter::FILTER_HASHES);
+

+
        Ok(Self::from(f))
    }
}

@@ -548,7 +550,7 @@ mod tests {

    use crate::crypto::Unverified;
    use crate::storage::refs::SignedRefs;
-
    use crate::test::arbitrary;
+
    use crate::test::{arbitrary, assert_matches};

    #[quickcheck]
    fn prop_u8(input: u8) {
@@ -604,6 +606,14 @@ mod tests {
    }

    #[quickcheck]
+
    fn prop_filter(input: filter::Filter) {
+
        assert_eq!(
+
            deserialize::<filter::Filter>(&serialize(&input)).unwrap(),
+
            input
+
        );
+
    }
+

+
    #[quickcheck]
    fn prop_id(input: Id) {
        assert_eq!(deserialize::<Id>(&serialize(&input)).unwrap(), input);
    }
@@ -662,4 +672,16 @@ mod tests {
        };
        assert_eq!(deserialize::<git::Url>(&serialize(&url)).unwrap(), url);
    }
+

+
    #[test]
+
    fn test_filter_invalid() {
+
        let b = bloomy::BloomFilter::with_size(filter::FILTER_SIZE_M / 3);
+
        let f = filter::Filter::from(b);
+
        let bytes = serialize(&f);
+

+
        assert_matches!(
+
            deserialize::<filter::Filter>(&bytes).unwrap_err(),
+
            Error::InvalidFilterSize(_)
+
        );
+
    }
}
modified radicle/src/test/arbitrary.rs
@@ -28,6 +28,16 @@ pub fn set<T: Eq + Hash + Arbitrary>(range: impl RangeBounds<usize>) -> HashSet<
    set
}

+
pub fn vec<T: Eq + Arbitrary>(size: usize) -> Vec<T> {
+
    let mut vec = Vec::with_capacity(size);
+
    let mut g = quickcheck::Gen::new(size);
+

+
    for _ in 0..vec.capacity() {
+
        vec.push(T::arbitrary(&mut g));
+
    }
+
    vec
+
}
+

pub fn gen<T: Arbitrary>(size: usize) -> T {
    let mut gen = quickcheck::Gen::new(size);