Don't compile the regexes each time

This commit is contained in:
Daniel García 2020-07-14 21:58:27 +02:00
parent 668d5c23dc
commit f14e19a3d8
No known key found for this signature in database
GPG Key ID: FC8A7D14C3CD543A
1 changed files with 8 additions and 3 deletions

View File

@ -30,6 +30,11 @@ static CLIENT: Lazy<Client> = Lazy::new(|| {
.unwrap()
});
static ICON_REL_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"icon$|apple.*icon").unwrap());
static ICON_HREF_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\w+\.(jpg|jpeg|png|ico)(\?.*)?$|^data:image.*base64").unwrap());
static ICON_SIZE_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?x)(\d+)\D*(\d+)").unwrap());
fn is_valid_domain(domain: &str) -> bool {
// Don't allow empty or too big domains or path traversal
if domain.is_empty() || domain.len() > 255 || domain.contains("..") {
@ -235,8 +240,8 @@ fn get_icon_url(domain: &str) -> Result<(Vec<Icon>, String), Error> {
// Search for and filter
let favicons = soup
.tag("link")
.attr("rel", Regex::new(r"icon$|apple.*icon")?) // Only use icon rels
.attr("href", Regex::new(r"(?i)\w+\.(jpg|jpeg|png|ico)(\?.*)?$|^data:image.*base64")?) // Only allow specific extensions
.attr("rel", ICON_REL_REGEX.clone()) // Only use icon rels
.attr("href", ICON_HREF_REGEX.clone()) // Only allow specific extensions
.find_all();
// Loop through all the found icons and determine it's priority
@ -348,7 +353,7 @@ fn parse_sizes(sizes: Option<String>) -> (u16, u16) {
let mut height: u16 = 0;
if let Some(sizes) = sizes {
match Regex::new(r"(?x)(\d+)\D*(\d+)").unwrap().captures(sizes.trim()) {
match ICON_SIZE_REGEX.captures(sizes.trim()) {
None => {}
Some(dimensions) => {
if dimensions.len() >= 3 {