Skip to content

[DRAFT] Use the one flatbuffer to store all lists #489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 85 additions & 107 deletions src/blocker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ use serde::Serialize;
use std::collections::HashSet;
use std::ops::DerefMut;

use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper};
use crate::filters::fb_network::SharedStateRef;
use crate::filters::network::NetworkFilterMaskHelper;
use crate::network_filter_list::NetworkFilterList;
use crate::regex_manager::{RegexManager, RegexManagerDiscardPolicy};
use crate::request::Request;
use crate::resources::ResourceStorage;
use crate::utils::Hash;

/// Options used when constructing a [`Blocker`].
pub struct BlockerOptions {
Expand Down Expand Up @@ -64,28 +64,31 @@ pub struct BlockerResult {
// pass empty set for the rest
static NO_TAGS: Lazy<HashSet<String>> = Lazy::new(HashSet::new);

// TODO: move to a proper place
pub(crate) enum FilterId {
Csp = 0,
Exceptions = 1,
Importants = 2,
Redirects = 3,
RemoveParam = 4,
Filters = 5,
GenericHide = 6,
TaggedFiltersAll = 7,
Size = 8,
}

/// Stores network filters for efficient querying.
pub struct Blocker {
pub(crate) csp: NetworkFilterList,
pub(crate) exceptions: NetworkFilterList,
pub(crate) importants: NetworkFilterList,
pub(crate) redirects: NetworkFilterList,
pub(crate) removeparam: NetworkFilterList,
pub(crate) filters: NetworkFilterList,
pub(crate) generic_hide: NetworkFilterList,

// Enabled tags are not serialized - when deserializing, tags of the existing
// instance (the one we are recreating lists into) are maintained
pub(crate) tags_enabled: HashSet<String>,
pub(crate) tagged_filters_all: NetworkFilterList,

pub(crate) enable_optimizations: bool,

// Not serialized
#[cfg(feature = "unsync-regex-caching")]
pub(crate) regex_manager: std::cell::RefCell<RegexManager>,
#[cfg(not(feature = "unsync-regex-caching"))]
pub(crate) regex_manager: std::sync::Mutex<RegexManager>,

pub(crate) shared_state: SharedStateRef,
}

impl Blocker {
Expand All @@ -95,6 +98,46 @@ impl Blocker {
self.check_parameterised(request, resources, false, false)
}

pub(crate) fn get_list(&self, id: FilterId) -> NetworkFilterList {
// TODO: verify lists() size and id is in range
NetworkFilterList {
list: self.shared_state.memory.root().lists().get(id as usize),
shared_state: &self.shared_state,
}
}

pub(crate) fn csp(&self) -> NetworkFilterList {
self.get_list(FilterId::Csp)
}

pub(crate) fn exceptions(&self) -> NetworkFilterList {
self.get_list(FilterId::Exceptions)
}

pub(crate) fn importants(&self) -> NetworkFilterList {
self.get_list(FilterId::Importants)
}

pub(crate) fn redirects(&self) -> NetworkFilterList {
self.get_list(FilterId::Redirects)
}

pub(crate) fn removeparam(&self) -> NetworkFilterList {
self.get_list(FilterId::RemoveParam)
}

pub(crate) fn filters(&self) -> NetworkFilterList {
self.get_list(FilterId::Filters)
}

pub(crate) fn generic_hide(&self) -> NetworkFilterList {
self.get_list(FilterId::GenericHide)
}

pub(crate) fn tagged_filters_all(&self) -> NetworkFilterList {
self.get_list(FilterId::TaggedFiltersAll)
}

#[cfg(feature = "unsync-regex-caching")]
fn borrow_regex_manager(&self) -> std::cell::RefMut<RegexManager> {
#[allow(unused_mut)]
Expand All @@ -115,7 +158,7 @@ impl Blocker {

pub fn check_generic_hide(&self, hostname_request: &Request) -> bool {
let mut regex_manager = self.borrow_regex_manager();
self.generic_hide
self.generic_hide()
.check(hostname_request, &HashSet::new(), &mut regex_manager)
.is_some()
}
Expand All @@ -139,33 +182,35 @@ impl Blocker {
// 4. exceptions - if any non-important match of forced

// Always check important filters
let important_filter = self.importants.check(request, &NO_TAGS, &mut regex_manager);
let important_filter = self
.importants()
.check(request, &NO_TAGS, &mut regex_manager);

// only check the rest of the rules if not previously matched
let filter = if important_filter.is_none() && !matched_rule {
self.tagged_filters_all
self.tagged_filters_all()
.check(request, &self.tags_enabled, &mut regex_manager)
.or_else(|| self.filters.check(request, &NO_TAGS, &mut regex_manager))
.or_else(|| self.filters().check(request, &NO_TAGS, &mut regex_manager))
} else {
important_filter
};

let exception = match filter.as_ref() {
// if no other rule matches, only check exceptions if forced to
None if matched_rule || force_check_exceptions => {
self.exceptions
self.exceptions()
.check(request, &self.tags_enabled, &mut regex_manager)
}
None => None,
// If matched an important filter, exceptions don't atter
Some(f) if f.is_important() => None,
Some(_) => self
.exceptions
.exceptions()
.check(request, &self.tags_enabled, &mut regex_manager),
};

let redirect_filters =
self.redirects
self.redirects()
.check_all(request, &NO_TAGS, regex_manager.deref_mut());

// Extract the highest priority redirect directive.
Expand Down Expand Up @@ -231,7 +276,7 @@ impl Blocker {
let rewritten_url = if important {
None
} else {
Self::apply_removeparam(&self.removeparam, request, regex_manager.deref_mut())
Self::apply_removeparam(&self.removeparam(), request, regex_manager.deref_mut())
};

// If something has already matched before but we don't know what, still return a match
Expand Down Expand Up @@ -346,7 +391,7 @@ impl Blocker {

let mut regex_manager = self.borrow_regex_manager();
let filters = self
.csp
.csp()
.check_all(request, &self.tags_enabled, &mut regex_manager);

if filters.is_empty() {
Expand Down Expand Up @@ -390,96 +435,29 @@ impl Blocker {
Some(merged)
}

pub fn new(network_filters: Vec<NetworkFilter>, options: &BlockerOptions) -> Self {
// Capacity of filter subsets estimated based on counts in EasyList and EasyPrivacy - if necessary
// the Vectors will grow beyond the pre-set capacity, but it is more efficient to allocate all at once
// $csp=
let mut csp = Vec::with_capacity(200);
// @@filter
let mut exceptions = Vec::with_capacity(network_filters.len() / 8);
// $important
let mut importants = Vec::with_capacity(200);
// $redirect, $redirect-rule
let mut redirects = Vec::with_capacity(200);
// $removeparam
let mut removeparam = Vec::with_capacity(60);
// $tag=
let mut tagged_filters_all = Vec::with_capacity(200);
// $badfilter
let mut badfilters = Vec::with_capacity(100);
// $generichide
let mut generic_hide = Vec::with_capacity(4000);
// All other filters
let mut filters = Vec::with_capacity(network_filters.len());

// Injections
// TODO: resource handling

if !network_filters.is_empty() {
for filter in network_filters.iter() {
if filter.is_badfilter() {
badfilters.push(filter);
}
}
let badfilter_ids: HashSet<Hash> = badfilters
.iter()
.map(|f| f.get_id_without_badfilter())
.collect();
for filter in network_filters {
// skip any bad filters
let filter_id = filter.get_id();
if badfilter_ids.contains(&filter_id) || filter.is_badfilter() {
continue;
}

// Redirects are independent of blocking behavior.
if filter.is_redirect() {
redirects.push(filter.clone());
}

if filter.is_csp() {
csp.push(filter);
} else if filter.is_removeparam() {
removeparam.push(filter);
} else if filter.is_generic_hide() {
generic_hide.push(filter);
} else if filter.is_exception() {
exceptions.push(filter);
} else if filter.is_important() {
importants.push(filter);
} else if filter.tag.is_some() && !filter.is_redirect() {
// `tag` + `redirect` is unsupported for now.
tagged_filters_all.push(filter);
} else if (filter.is_redirect() && filter.also_block_redirect())
|| !filter.is_redirect()
{
filters.push(filter);
}
}
}

pub(crate) fn from_shared_state(shared_state: SharedStateRef) -> Self {
Self {
csp: NetworkFilterList::new(csp, options.enable_optimizations),
exceptions: NetworkFilterList::new(exceptions, options.enable_optimizations),
importants: NetworkFilterList::new(importants, options.enable_optimizations),
redirects: NetworkFilterList::new(redirects, options.enable_optimizations),
// Don't optimize removeparam, since it can fuse filters without respecting distinct
// queryparam values
removeparam: NetworkFilterList::new(removeparam, false),
filters: NetworkFilterList::new(filters, options.enable_optimizations),
generic_hide: NetworkFilterList::new(generic_hide, options.enable_optimizations),
// Tags special case for enabling/disabling them dynamically
shared_state,
tags_enabled: HashSet::new(),
tagged_filters_all: NetworkFilterList::new(
tagged_filters_all,
options.enable_optimizations,
),
// Options
enable_optimizations: options.enable_optimizations,
regex_manager: Default::default(),
}
}

// TODO: only for tests and benchmarks
#[allow(dead_code)]
pub(crate) fn new(
network_filters: Vec<crate::filters::network::NetworkFilter>,
options: &BlockerOptions,
) -> Self {
use crate::filters::fb_network::SharedState;
use crate::filters::flat_builder::FlatBufferBuilder;

let memory =
FlatBufferBuilder::make_flatbuffer(network_filters, options.enable_optimizations);
let shared_state = SharedState::new(memory);
Self::from_shared_state(shared_state)
}

pub fn use_tags(&mut self, tags: &[&str]) {
let tag_set: HashSet<String> = tags.iter().map(|&t| String::from(t)).collect();
self.tags_with_set(tag_set);
Expand Down
8 changes: 4 additions & 4 deletions src/data_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ mod storage;

pub(crate) mod utils;

use crate::blocker::Blocker;
use crate::cosmetic_filter_cache::CosmeticFilterCache;
use crate::filters::unsafe_tools::VerifiedFlatbufferMemory;
use crate::network_filter_list::NetworkFilterListParsingError;

/// Newer formats start with this magic byte sequence.
Expand Down Expand Up @@ -62,16 +62,16 @@ impl From<NetworkFilterListParsingError> for DeserializationError {
}

pub(crate) fn serialize_engine(
blocker: &Blocker,
flatbuffer_memory: &VerifiedFlatbufferMemory,
cfc: &CosmeticFilterCache,
) -> Result<Vec<u8>, SerializationError> {
let serialize_format = storage::SerializeFormat::from((blocker, cfc));
let serialize_format = storage::SerializeFormat::from((flatbuffer_memory, cfc));
serialize_format.serialize()
}

pub(crate) fn deserialize_engine(
serialized: &[u8],
) -> Result<(Blocker, CosmeticFilterCache), DeserializationError> {
) -> Result<(VerifiedFlatbufferMemory, CosmeticFilterCache), DeserializationError> {
let deserialize_format = storage::DeserializeFormat::deserialize(serialized)?;
deserialize_format.try_into()
}
Expand Down
Loading
Loading