Skip to content

Decouple serialization/deserialization logic from internal data representation #102

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 19 additions & 27 deletions src/blocker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pub enum BlockerError {
}

#[cfg(feature = "object-pooling")]
struct TokenPool {
pub struct TokenPool {
pub pool: Pool<Vec<utils::Hash>>
}

Expand All @@ -70,37 +70,31 @@ impl Default for TokenPool {
}
}

#[derive(Serialize, Deserialize)]
pub struct Blocker {
csp: NetworkFilterList,
exceptions: NetworkFilterList,
importants: NetworkFilterList,
redirects: NetworkFilterList,
filters_tagged: NetworkFilterList,
filters: NetworkFilterList,
pub(crate) csp: NetworkFilterList,
pub(crate) exceptions: NetworkFilterList,
pub(crate) importants: NetworkFilterList,
pub(crate) redirects: NetworkFilterList,
pub(crate) filters_tagged: NetworkFilterList,
pub(crate) filters: NetworkFilterList,

// Do not serialize enabled tags - when deserializing, tags of the existing
// Enabled tags are not serialized - when deserializing, tags of the existing
// instance (the one we are recreating lists into) are maintained
#[serde(skip_serializing, skip_deserializing)]
tags_enabled: HashSet<String>,
tagged_filters_all: Vec<NetworkFilter>,
pub(crate) tags_enabled: HashSet<String>,
pub(crate) tagged_filters_all: Vec<NetworkFilter>,

#[serde(skip_serializing, skip_deserializing)]
hot_filters: NetworkFilterList,
// Not serialized
pub(crate) hot_filters: NetworkFilterList,

debug: bool,
enable_optimizations: bool,
_unused: bool, // This field exists for backwards compatibility only.
_unused2: bool, // This field exists for backwards compatibility only, and *must* be true.
pub(crate) debug: bool,
pub(crate) enable_optimizations: bool,

#[serde(default)]
resources: RedirectResourceStorage,
pub(crate) resources: RedirectResourceStorage,
// Not serialized
#[cfg(feature = "object-pooling")]
#[serde(skip_serializing, skip_deserializing)]
pool: TokenPool,
pub(crate) pool: TokenPool,

#[serde(default)]
generic_hide: NetworkFilterList,
pub(crate) generic_hide: NetworkFilterList,
}

impl Blocker {
Expand Down Expand Up @@ -327,8 +321,6 @@ impl Blocker {
// Options
debug: options.debug,
enable_optimizations: options.enable_optimizations,
_unused: true,
_unused2: true,

resources: RedirectResourceStorage::default(),
#[cfg(feature = "object-pooling")]
Expand Down Expand Up @@ -436,7 +428,7 @@ impl Blocker {
}

#[derive(Serialize, Deserialize, Default)]
struct NetworkFilterList {
pub struct NetworkFilterList {
filter_map: HashMap<Hash, Vec<Arc<NetworkFilter>>>,
// optimized: Option<bool>
}
Expand Down
18 changes: 9 additions & 9 deletions src/cosmetic_filter_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,17 @@ fn hostname_specific_rules(rules: &[&SpecificFilterType]) -> (HashSet<String>, H
}

#[derive(Deserialize, Serialize)]
pub struct CosmeticFilterCache {
simple_class_rules: HashSet<String>,
simple_id_rules: HashSet<String>,
complex_class_rules: HashMap<String, Vec<String>>,
complex_id_rules: HashMap<String, Vec<String>>,
pub(crate) struct CosmeticFilterCache {
pub(crate) simple_class_rules: HashSet<String>,
pub(crate) simple_id_rules: HashSet<String>,
pub(crate) complex_class_rules: HashMap<String, Vec<String>>,
pub(crate) complex_id_rules: HashMap<String, Vec<String>>,

specific_rules: HostnameRuleDb,
pub(crate) specific_rules: HostnameRuleDb,

misc_generic_selectors: HashSet<String>,
pub(crate) misc_generic_selectors: HashSet<String>,

scriptlets: ScriptletResourceStorage,
pub(crate) scriptlets: ScriptletResourceStorage,
}

impl CosmeticFilterCache {
Expand Down Expand Up @@ -311,7 +311,7 @@ impl HostnameExceptionsBuilder {
}
}

#[derive(Deserialize, Serialize)]
#[derive(Deserialize, Serialize, Default)]
pub struct HostnameRuleDb {
db: HashMap<Hash, Vec<SpecificFilterType>>,
}
Expand Down
250 changes: 250 additions & 0 deletions src/data_format.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
//! Contains representations of data from the adblocking engine in a
//! forwards-and-backwards-compatible format, as well as utilities for converting these to and from
//! the actual `Engine` components.
//!
//! The format itself is split into two parts for historical reasons. Any new fields should be
//! added to the _end_ of both `SerializeFormatRest` and `DeserializeFormatRest`.

use std::collections::{HashSet, HashMap};
use serde::{Deserialize, Serialize};
use flate2::write::GzEncoder;
use flate2::read::GzDecoder;
use flate2::Compression;
use rmps;

use crate::blocker::{Blocker, NetworkFilterList};
use crate::resources::{RedirectResourceStorage, ScriptletResourceStorage};
use crate::filters::network::NetworkFilter;
use crate::cosmetic_filter_cache::{CosmeticFilterCache, HostnameRuleDb};
use crate::utils::is_eof_error;

/// Provides structural aggregration of referenced adblock engine data to allow for allocation-free
/// serialization.
///
/// Note that this does not implement `Serialize` directly, as it is composed of two parts which
/// must be serialized independently. Instead, use the `serialize` method.
pub struct SerializeFormat<'a> {
part1: SerializeFormatPt1<'a>,
rest: SerializeFormatRest<'a>,
}

#[derive(Debug)]
pub enum SerializationError {
RmpSerdeError(rmps::encode::Error),
GzError(std::io::Error),
}

impl From<rmps::encode::Error> for SerializationError {
fn from(e: rmps::encode::Error) -> Self { Self::RmpSerdeError(e) }
}

impl From<std::io::Error> for SerializationError {
fn from(e: std::io::Error) -> Self { Self::GzError(e) }
}

impl<'a> SerializeFormat<'a> {
pub fn serialize(&self) -> Result<Vec<u8>, SerializationError> {
let mut gz = GzEncoder::new(Vec::new(), Compression::default());
rmps::encode::write(&mut gz, &self.part1)?;
rmps::encode::write(&mut gz, &self.rest)?;
let compressed = gz.finish()?;
Ok(compressed)
}
}

#[derive(Serialize)]
struct SerializeFormatPt1<'a> {
csp: &'a NetworkFilterList,
exceptions: &'a NetworkFilterList,
importants: &'a NetworkFilterList,
redirects: &'a NetworkFilterList,
filters_tagged: &'a NetworkFilterList,
filters: &'a NetworkFilterList,

tagged_filters_all: &'a Vec<NetworkFilter>,

debug: bool,
enable_optimizations: bool,

// This field exists for backwards compatibility only.
_unused: bool,
// This field exists for backwards compatibility only, and *must* be true.
_unused2: bool,

resources: &'a RedirectResourceStorage,
}

#[derive(Serialize)]
struct SerializeFormatRest<'a> {
simple_class_rules: &'a HashSet<String>,
simple_id_rules: &'a HashSet<String>,
complex_class_rules: &'a HashMap<String, Vec<String>>,
complex_id_rules: &'a HashMap<String, Vec<String>>,

specific_rules: &'a HostnameRuleDb,

misc_generic_selectors: &'a HashSet<String>,

scriptlets: &'a ScriptletResourceStorage,

generic_hide: &'a NetworkFilterList,
}

/// Structural representation of adblock engine data that can be built up from deserialization and
/// used directly to construct new `Engine` components without unnecessary allocation.
///
/// Note that this does not implement `Deserialize` directly, as it is composed of two parts which
/// must be deserialized independently. Instead, use the `deserialize` method.
pub struct DeserializeFormat {
part1: DeserializeFormatPart1,
rest: DeserializeFormatRest,
}

#[derive(Debug)]
pub enum DeserializationError {
RmpSerdeError(rmps::decode::Error),
}

impl From<rmps::decode::Error> for DeserializationError {
fn from(e: rmps::decode::Error) -> Self { Self::RmpSerdeError(e) }
}

impl DeserializeFormat {
pub fn deserialize(serialized: &[u8]) -> Result<Self, DeserializationError> {
let mut gz = GzDecoder::new(serialized);
let part1: DeserializeFormatPart1 = rmps::decode::from_read(&mut gz)?;
let rest = match rmps::decode::from_read(&mut gz) {
Ok(rest) => rest,
Err(ref e) if is_eof_error(e) => Default::default(),
Err(e) => return Err(DeserializationError::RmpSerdeError(e)),
};
Ok(Self { part1, rest })
}
}

#[derive(Deserialize)]
struct DeserializeFormatPart1 {
csp: NetworkFilterList,
exceptions: NetworkFilterList,
importants: NetworkFilterList,
redirects: NetworkFilterList,
filters_tagged: NetworkFilterList,
filters: NetworkFilterList,

tagged_filters_all: Vec<NetworkFilter>,

debug: bool,
enable_optimizations: bool,

// This field exists for backwards compatibility only.
_unused: bool,
// This field exists for backwards compatibility only, and *must* be true.
_unused2: bool,

#[serde(default)]
resources: RedirectResourceStorage,
}

/// Any fields added to this must include the `#[serde(default)]` annotation, or another serde
/// annotation that will allow the format to gracefully handle missing fields when deserializing
/// from older versions of the format.
#[derive(Deserialize, Default)]
struct DeserializeFormatRest {
#[serde(default)]
simple_class_rules: HashSet<String>,
#[serde(default)]
simple_id_rules: HashSet<String>,
#[serde(default)]
complex_class_rules: HashMap<String, Vec<String>>,
#[serde(default)]
complex_id_rules: HashMap<String, Vec<String>>,

#[serde(default)]
specific_rules: HostnameRuleDb,

#[serde(default)]
misc_generic_selectors: HashSet<String>,

#[serde(default)]
scriptlets: ScriptletResourceStorage,

#[serde(default)]
generic_hide: NetworkFilterList,
}

impl<'a> From<(&'a Blocker, &'a CosmeticFilterCache)> for SerializeFormat<'a> {
fn from(v: (&'a Blocker, &'a CosmeticFilterCache)) -> Self {
let (blocker, cfc) = v;
Self {
part1: SerializeFormatPt1 {
csp: &blocker.csp,
exceptions: &blocker.exceptions,
importants: &blocker.importants,
redirects: &blocker.redirects,
filters_tagged: &blocker.filters_tagged,
filters: &blocker.filters,

tagged_filters_all: &blocker.tagged_filters_all,

debug: blocker.debug,
enable_optimizations: blocker.enable_optimizations,
_unused: true,
_unused2: true,

resources: &blocker.resources,
},
rest: SerializeFormatRest {
simple_class_rules: &cfc.simple_class_rules,
simple_id_rules: &cfc.simple_id_rules,
complex_class_rules: &cfc.complex_class_rules,
complex_id_rules: &cfc.complex_id_rules,

specific_rules: &cfc.specific_rules,

misc_generic_selectors: &cfc.misc_generic_selectors,

scriptlets: &cfc.scriptlets,

generic_hide: &blocker.generic_hide,
},
}
}
}

impl Into<(Blocker, CosmeticFilterCache)> for DeserializeFormat {
fn into(self) -> (Blocker, CosmeticFilterCache) {
(Blocker {
csp: self.part1.csp,
exceptions: self.part1.exceptions,
importants: self.part1.importants,
redirects: self.part1.redirects,
filters_tagged: self.part1.filters_tagged,
filters: self.part1.filters,

tags_enabled: Default::default(),
tagged_filters_all: self.part1.tagged_filters_all,

hot_filters: Default::default(),

debug: self.part1.debug,
enable_optimizations: self.part1.enable_optimizations,

resources: self.part1.resources,
#[cfg(feature = "object-pooling")]
pool: Default::default(),

generic_hide: self.rest.generic_hide,
}, CosmeticFilterCache {
simple_class_rules: self.rest.simple_class_rules,
simple_id_rules: self.rest.simple_id_rules,
complex_class_rules: self.rest.complex_class_rules,
complex_id_rules: self.rest.complex_id_rules,

specific_rules: self.rest.specific_rules,

misc_generic_selectors: self.rest.misc_generic_selectors,

scriptlets: self.rest.scriptlets,
})
}
}
Loading