Skip to content

Commit 94c5911

Browse files
committed
Add function to clean urls from tracking query params that are often included in the shared links
1 parent dcb507d commit 94c5911

File tree

3 files changed

+36
-3
lines changed

3 files changed

+36
-3
lines changed

Cargo.lock

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ htmlescape = "0.3.1"
5656
bincode = "1.3.3"
5757
base2048 = "2.0.2"
5858
revision = "0.10.0"
59+
clearurls = "0.0.4"
5960

6061

6162
[dev-dependencies]

src/utils.rs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::config::{self, get_setting};
66
// CRATES
77
//
88
use crate::{client::json, server::RequestExt};
9+
use clearurls::UrlCleaner;
910
use cookie::Cookie;
1011
use hyper::{Body, Request, Response};
1112
use libflate::deflate::{Decoder, Encoder};
@@ -23,6 +24,7 @@ use std::env;
2324
use std::io::{Read, Write};
2425
use std::str::FromStr;
2526
use std::string::ToString;
27+
use std::sync::Mutex;
2628
use time::{macros::format_description, Duration, OffsetDateTime};
2729
use url::Url;
2830

@@ -269,7 +271,7 @@ impl Media {
269271
(
270272
post_type.to_string(),
271273
Self {
272-
url: format_url(url_val.as_str().unwrap_or_default()),
274+
url: format_url(clean_url(url_val.as_str().unwrap_or_default()).as_str()),
273275
alt_url,
274276
// Note: in the data["is_reddit_media_domain"] path above
275277
// width and height will be 0.
@@ -1075,6 +1077,22 @@ pub fn format_url(url: &str) -> String {
10751077
}
10761078
}
10771079

1080+
// Remove tracking query params
1081+
static URL_CLEANER: Lazy<Mutex<UrlCleaner>> = Lazy::new(|| Mutex::new(UrlCleaner::from_embedded_rules().expect("Failed to initialize UrlCleaner")));
1082+
1083+
pub fn clean_url(url: &str) -> String {
1084+
let is_external_url = match Url::parse(url) {
1085+
Ok(parsed_url) => parsed_url.domain().is_some(),
1086+
_ => false,
1087+
};
1088+
let mut cleaned_url = url.to_owned();
1089+
if is_external_url {
1090+
let cleaner = URL_CLEANER.lock().unwrap();
1091+
cleaned_url = cleaner.clear_single_url_str(url).expect("Unable to clean the URL.").as_ref().to_owned();
1092+
}
1093+
cleaned_url
1094+
}
1095+
10781096
static REGEX_BULLET: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)^- (.*)$").unwrap());
10791097
static REGEX_BULLET_CONSECUTIVE_LINES: Lazy<Regex> = Lazy::new(|| Regex::new(r"</ul>\n<ul>").unwrap());
10801098

@@ -1628,11 +1646,11 @@ fn test_rewriting_bullet_list() {
16281646
- Super resolution + Off (it looks horrible anyway)
16291647
- Sharpness 50 (default one I think)
16301648
- Black level High (low messes up gray colors)
1631-
- DFC Off
1649+
- DFC Off
16321650
- Response Time Middle (personal preference, <a href="https://www.blurbusters.com/">https://www.blurbusters.com/</a> show horrible overdrive with it on high)
16331651
- Freesync doesn&#39;t matter
16341652
- Black stabilizer 50
1635-
- Gamma setting on 0
1653+
- Gamma setting on 0
16361654
- Color Temp Medium
16371655
How`s your monitor by the way? Any IPS bleed whatsoever? I either got lucky or the panel is pretty good, 0 bleed for me, just the usual IPS glow. How about the pixels? I see the pixels even at one meter away, especially on Microsoft Edge&#39;s icon for example, the blue background is just blocky, don&#39;t know why.</p>
16381656
</div>"#;

0 commit comments

Comments
 (0)