From 1a8ec047335756ac24830f0fb51a0e2cf5b05264 Mon Sep 17 00:00:00 2001 From: BlackDex Date: Thu, 10 Dec 2020 23:13:24 +0100 Subject: [PATCH] Small update on favicon downloading - Changed the user-agent, which caused at least one site to stall the connection (Same happens on icons.bitwarden.com) - Added default_header creation to the lazy static CLIENT - Added referer passing, which is checked by some sites - Some small other changes --- src/api/icons.rs | 90 +++++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/src/api/icons.rs b/src/api/icons.rs index 9c3e779..60e5fc0 100644 --- a/src/api/icons.rs +++ b/src/api/icons.rs @@ -9,7 +9,7 @@ use std::{ use once_cell::sync::Lazy; use regex::Regex; -use reqwest::{blocking::Client, blocking::Response, header::HeaderMap, Url}; +use reqwest::{blocking::Client, blocking::Response, header, Url}; use rocket::{http::ContentType, http::Cookie, response::Content, Route}; use soup::prelude::*; @@ -22,10 +22,18 @@ pub fn routes() -> Vec { const ALLOWED_CHARS: &str = "_-."; static CLIENT: Lazy = Lazy::new(|| { + // Generate the default headers + let mut default_headers = header::HeaderMap::new(); + default_headers.insert(header::USER_AGENT, header::HeaderValue::from_static("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15")); + default_headers.insert(header::ACCEPT_LANGUAGE, header::HeaderValue::from_static("en-US,en;q=0.8")); + default_headers.insert(header::CACHE_CONTROL, header::HeaderValue::from_static("no-cache")); + default_headers.insert(header::PRAGMA, header::HeaderValue::from_static("no-cache")); + default_headers.insert(header::ACCEPT, header::HeaderValue::from_static("text/html,application/xhtml+xml,application/xml; q=0.9,image/webp,image/apng,*/*;q=0.8")); + // Reuse the client between requests Client::builder() .timeout(Duration::from_secs(CONFIG.icon_download_timeout())) - .default_headers(_header_map()) + .default_headers(default_headers) .build() .unwrap() }); @@ -324,6 +332,12 @@ impl Icon { } } +struct IconUrlResult { + iconlist: Vec, + cookies: String, + referer: String, +} + /// Returns a Result/Tuple which holds a Vector IconList and a string which holds the cookies from the last response. /// There will always be a result with a string which will contain https://example.com/favicon.ico and an empty string for the cookies. /// This does not mean that that location does exists, but it is the default location browser use. @@ -336,19 +350,11 @@ impl Icon { /// let (mut iconlist, cookie_str) = get_icon_url("github.com")?; /// let (mut iconlist, cookie_str) = get_icon_url("gitlab.com")?; /// ``` -fn get_icon_url(domain: &str) -> Result<(Vec, String), Error> { +fn get_icon_url(domain: &str) -> Result { // Default URL with secure and insecure schemes let ssldomain = format!("https://{}", domain); let httpdomain = format!("http://{}", domain); - // Create the iconlist - let mut iconlist: Vec = Vec::new(); - - // Create the cookie_str to fill it all the cookies from the response - // These cookies can be used to request/download the favicon image. - // Some sites have extra security in place with for example XSRF Tokens. - let mut cookie_str = String::new(); - // First check the domain as given during the request for both HTTPS and HTTP. let resp = match get_page(&ssldomain).or_else(|_| get_page(&httpdomain)) { Ok(c) => Ok(c), @@ -388,6 +394,15 @@ fn get_icon_url(domain: &str) -> Result<(Vec, String), Error> { } }; + // Create the iconlist + let mut iconlist: Vec = Vec::new(); + + // Create the cookie_str to fill it all the cookies from the response + // These cookies can be used to request/download the favicon image. + // Some sites have extra security in place with for example XSRF Tokens. + let mut cookie_str = "".to_string(); + let mut referer = "".to_string(); + if let Ok(content) = resp { // Extract the URL from the respose in case redirects occured (like @ gitlab.com) let url = content.url().clone(); @@ -407,6 +422,10 @@ fn get_icon_url(domain: &str) -> Result<(Vec, String), Error> { }) .collect::(); + // Set the referer to be used on the final request, some sites check this. + // Mostly used to prevent direct linking and other security resons. + referer = url.as_str().to_string(); + // Add the default favicon.ico to the list with the domain the content responded from. iconlist.push(Icon::new(35, url.join("/favicon.ico").unwrap().into_string())); @@ -446,21 +465,28 @@ fn get_icon_url(domain: &str) -> Result<(Vec, String), Error> { iconlist.sort_by_key(|x| x.priority); // There always is an icon in the list, so no need to check if it exists, and just return the first one - Ok((iconlist, cookie_str)) + Ok(IconUrlResult{ + iconlist, + cookies: cookie_str, + referer + }) } fn get_page(url: &str) -> Result { - get_page_with_cookies(url, "") + get_page_with_cookies(url, "", "") } -fn get_page_with_cookies(url: &str, cookie_str: &str) -> Result { +fn get_page_with_cookies(url: &str, cookie_str: &str, referer: &str) -> Result { if is_domain_blacklisted(Url::parse(url).unwrap().host_str().unwrap_or_default()) { err!("Favicon rel linked to a blacklisted domain!"); } let mut client = CLIENT.get(url); if !cookie_str.is_empty() { - client = client.header("cookie", cookie_str) + client = client.header("Cookie", cookie_str) + } + if !referer.is_empty() { + client = client.header("Referer", referer) } client.send()? @@ -493,7 +519,7 @@ fn get_icon_priority(href: &str, sizes: Option) -> u8 { 1 } else if width == 64 { 2 - } else if width >= 24 && width <= 128 { + } else if (24..=128).contains(&width) { 3 } else if width == 16 { 4 @@ -552,13 +578,13 @@ fn download_icon(domain: &str) -> Result, Error> { err!("Domain is blacklisted", domain) } - let (iconlist, cookie_str) = get_icon_url(&domain)?; + let icon_result = get_icon_url(&domain)?; let mut buffer = Vec::new(); use data_url::DataUrl; - for icon in iconlist.iter().take(5) { + for icon in icon_result.iconlist.iter().take(5) { if icon.href.starts_with("data:image") { let datauri = DataUrl::process(&icon.href).unwrap(); // Check if we are able to decode the data uri @@ -573,13 +599,13 @@ fn download_icon(domain: &str) -> Result, Error> { _ => warn!("data uri is invalid"), }; } else { - match get_page_with_cookies(&icon.href, &cookie_str) { + match get_page_with_cookies(&icon.href, &icon_result.cookies, &icon_result.referer) { Ok(mut res) => { info!("Downloaded icon from {}", icon.href); res.copy_to(&mut buffer)?; break; - } - Err(_) => warn!("Download failed for {}", icon.href), + }, + _ => warn!("Download failed for {}", icon.href), }; } } @@ -604,25 +630,3 @@ fn save_icon(path: &str, icon: &[u8]) { } } } - -fn _header_map() -> HeaderMap { - // Set some default headers for the request. - // Use a browser like user-agent to make sure most websites will return there correct website. - use reqwest::header::*; - - macro_rules! headers { - ($( $name:ident : $value:literal),+ $(,)? ) => { - let mut headers = HeaderMap::new(); - $( headers.insert($name, HeaderValue::from_static($value)); )+ - headers - }; - } - - headers! { - USER_AGENT: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299", - ACCEPT_LANGUAGE: "en-US,en;q=0.8", - CACHE_CONTROL: "no-cache", - PRAGMA: "no-cache", - ACCEPT: "text/html,application/xhtml+xml,application/xml; q=0.9,image/webp,image/apng,*/*;q=0.8", - } -}