Skip to content

Commit

Permalink
Use FxHasher for internal hash-based data structures
Browse files Browse the repository at this point in the history
  • Loading branch information
wilsonzlin committed Jan 5, 2023
1 parent b6a0a91 commit 5989c06
Show file tree
Hide file tree
Showing 13 changed files with 65 additions and 59 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# minify-html changelog

## Pending

- Use FxHasher for internal hash-based data structures.

## 0.10.3

- [Python] Add Python 3.11 support.
Expand Down
20 changes: 10 additions & 10 deletions gen/attrs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,19 @@ const rsTagAttr = ({
`
AttributeMinification {
boolean: ${boolean},
case_insensitive: ${caseInsensitive},
collapse: ${collapse},
case_insensitive: ${caseInsensitive},
collapse: ${collapse},
default_value: ${
defaultValue == undefined ? "None" : `Some(b"${defaultValue}")`
},
redundant_if_empty: ${redundantIfEmpty},
trim: ${trim},
redundant_if_empty: ${redundantIfEmpty},
trim: ${trim},
}
`;

let code = `
use lazy_static::lazy_static;
use std::collections::HashMap;
use rustc_hash::FxHashMap;
use crate::common::spec::tag::ns::Namespace;
pub struct AttributeMinification {
Expand All @@ -47,7 +47,7 @@ pub struct AttributeMinification {
pub enum AttrMapEntry {
AllNamespaceElements(AttributeMinification),
SpecificNamespaceElements(HashMap<&'static [u8], AttributeMinification>),
SpecificNamespaceElements(FxHashMap<&'static [u8], AttributeMinification>),
}
pub struct ByNamespace {
Expand All @@ -65,10 +65,10 @@ impl ByNamespace {
}
}
pub struct AttrMap(HashMap<&'static [u8], ByNamespace>);
pub struct AttrMap(FxHashMap<&'static [u8], ByNamespace>);
impl AttrMap {
pub const fn new(map: HashMap<&'static [u8], ByNamespace>) -> AttrMap {
pub const fn new(map: FxHashMap<&'static [u8], ByNamespace>) -> AttrMap {
AttrMap(map)
}
Expand All @@ -85,7 +85,7 @@ impl AttrMap {
code += `
lazy_static! {
pub static ref ATTRS: AttrMap = {
let mut m = HashMap::<&'static [u8], ByNamespace>::new();
let mut m = FxHashMap::<&'static [u8], ByNamespace>::default();
${[...Object.entries(htmlData.attributes)]
.map(
([attr_name, namespaces]) => ` m.insert(b\"${attr_name}\", ByNamespace {
Expand All @@ -108,7 +108,7 @@ ${(["html", "svg"] as const)
return `Some({
let ${
entries.length ? "mut" : ""
} m = HashMap::<&'static [u8], AttributeMinification>::new();
} m = FxHashMap::<&'static [u8], AttributeMinification>::default();
${entries
.map(
([tagName, tagAttr]) =>
Expand Down
6 changes: 3 additions & 3 deletions rust/common/spec/script.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use lazy_static::lazy_static;
use std::collections::HashSet;
use rustc_hash::FxHashSet;

lazy_static! {
pub static ref JAVASCRIPT_MIME_TYPES: HashSet<&'static [u8]> = {
let mut s = HashSet::<&'static [u8]>::new();
pub static ref JAVASCRIPT_MIME_TYPES: FxHashSet<&'static [u8]> = {
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"application/ecmascript");
s.insert(b"application/javascript");
s.insert(b"application/x-ecmascript");
Expand Down
46 changes: 23 additions & 23 deletions rust/common/spec/tag/omission.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use lazy_static::lazy_static;
use std::collections::{HashMap, HashSet};
use rustc_hash::{FxHashMap, FxHashSet};

// Rules sourced from https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission.
// TODO Opening tags
Expand All @@ -10,7 +10,7 @@ enum ClosingTagOmissionRuleIfLast {
// Closing tag can never be omitted if it's the last node of its parent's children.
Never,
// Closing tag can be omitted if it's the last node of its parent's children and the parent tag name is not one of these.
ParentIsNot(HashSet<&'static [u8]>),
ParentIsNot(FxHashSet<&'static [u8]>),
}

// What this means in effect while parsing:
Expand All @@ -21,22 +21,22 @@ enum ClosingTagOmissionRuleIfLast {
// - If C is in followed_by, B is closed implicitly.
struct ClosingTagOmissionRule {
// Closing tag can be omitted if immediately followed by an element node with one of these tag names.
followed_by: HashSet<&'static [u8]>,
followed_by: FxHashSet<&'static [u8]>,
// Closing tag can be omitted if it's the last node of its parent's children.
is_last: ClosingTagOmissionRuleIfLast,
}

lazy_static! {
static ref HTML_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: HashSet::new(),
followed_by: FxHashSet::default(),
is_last: ClosingTagOmissionRuleIfLast::Always,
};
}

lazy_static! {
static ref HEAD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"body");
s
},
Expand All @@ -46,15 +46,15 @@ lazy_static! {

lazy_static! {
static ref BODY_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: HashSet::new(),
followed_by: FxHashSet::default(),
is_last: ClosingTagOmissionRuleIfLast::Always,
};
}

lazy_static! {
static ref LI_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"li");
s
},
Expand All @@ -65,7 +65,7 @@ lazy_static! {
lazy_static! {
static ref DT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"dt");
s.insert(b"dd");
s
Expand All @@ -77,7 +77,7 @@ lazy_static! {
lazy_static! {
static ref DD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"dd");
s.insert(b"dt");
s
Expand All @@ -88,7 +88,7 @@ lazy_static! {

lazy_static! {
static ref P_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = {
let mut followed_by = HashSet::<&'static [u8]>::new();
let mut followed_by = FxHashSet::<&'static [u8]>::default();
followed_by.insert(b"address");
followed_by.insert(b"article");
followed_by.insert(b"aside");
Expand Down Expand Up @@ -120,7 +120,7 @@ lazy_static! {
followed_by.insert(b"table");
followed_by.insert(b"ul");

let mut is_last_tags = HashSet::<&'static [u8]>::new();
let mut is_last_tags = FxHashSet::<&'static [u8]>::default();
is_last_tags.insert(b"a");
is_last_tags.insert(b"audio");
is_last_tags.insert(b"del");
Expand All @@ -139,7 +139,7 @@ lazy_static! {
lazy_static! {
static ref RT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"rt");
s.insert(b"rp");
s
Expand All @@ -151,7 +151,7 @@ lazy_static! {
lazy_static! {
static ref RP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"rt");
s.insert(b"rp");
s
Expand All @@ -164,7 +164,7 @@ lazy_static! {
static ref OPTGROUP_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule =
ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"optgroup");
s
},
Expand All @@ -175,7 +175,7 @@ lazy_static! {
lazy_static! {
static ref OPTION_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"option");
s.insert(b"optgroup");
s
Expand All @@ -187,7 +187,7 @@ lazy_static! {
lazy_static! {
static ref THEAD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"tbody");
s.insert(b"tfoot");
s
Expand All @@ -199,7 +199,7 @@ lazy_static! {
lazy_static! {
static ref TBODY_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"tbody");
s.insert(b"tfoot");
s
Expand All @@ -210,15 +210,15 @@ lazy_static! {

lazy_static! {
static ref TFOOT_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: HashSet::<&'static [u8]>::new(),
followed_by: FxHashSet::<&'static [u8]>::default(),
is_last: ClosingTagOmissionRuleIfLast::Always,
};
}

lazy_static! {
static ref TR_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"tr");
s
},
Expand All @@ -229,7 +229,7 @@ lazy_static! {
lazy_static! {
static ref TD_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"td");
s.insert(b"th");
s
Expand All @@ -241,7 +241,7 @@ lazy_static! {
lazy_static! {
static ref TH_CLOSING_TAG_OMISSION_RULE: ClosingTagOmissionRule = ClosingTagOmissionRule {
followed_by: {
let mut s = HashSet::<&'static [u8]>::new();
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"td");
s.insert(b"th");
s
Expand All @@ -251,8 +251,8 @@ lazy_static! {
}

lazy_static! {
static ref CLOSING_TAG_OMISSION_RULES: HashMap<&'static [u8], &'static ClosingTagOmissionRule> = {
let mut m = HashMap::<&'static [u8], &'static ClosingTagOmissionRule>::new();
static ref CLOSING_TAG_OMISSION_RULES: FxHashMap<&'static [u8], &'static ClosingTagOmissionRule> = {
let mut m = FxHashMap::<&'static [u8], &'static ClosingTagOmissionRule>::default();
m.insert(b"html", &HTML_CLOSING_TAG_OMISSION_RULE);
m.insert(b"head", &HEAD_CLOSING_TAG_OMISSION_RULE);
m.insert(b"body", &BODY_CLOSING_TAG_OMISSION_RULE);
Expand Down
6 changes: 3 additions & 3 deletions rust/common/spec/tag/void.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use lazy_static::lazy_static;
use std::collections::HashSet;
use rustc_hash::FxHashSet;

lazy_static! {
pub static ref VOID_TAGS: HashSet<&'static [u8]> = {
let mut s = HashSet::<&'static [u8]>::new();
pub static ref VOID_TAGS: FxHashSet<&'static [u8]> = {
let mut s = FxHashSet::<&'static [u8]>::default();
s.insert(b"area");
s.insert(b"base");
s.insert(b"br");
Expand Down
10 changes: 5 additions & 5 deletions rust/common/spec/tag/whitespace.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::common::spec::tag::ns::Namespace;
use std::collections::HashMap;
use rustc_hash::FxHashMap;

use lazy_static::lazy_static;

Expand Down Expand Up @@ -60,8 +60,8 @@ static DEFAULT_SVG: &WhitespaceMinification = &WhitespaceMinification {
};

lazy_static! {
static ref HTML_TAG_WHITESPACE_MINIFICATION: HashMap<&'static [u8], &'static WhitespaceMinification> = {
let mut m = HashMap::<&'static [u8], &'static WhitespaceMinification>::new();
static ref HTML_TAG_WHITESPACE_MINIFICATION: FxHashMap<&'static [u8], &'static WhitespaceMinification> = {
let mut m = FxHashMap::<&'static [u8], &'static WhitespaceMinification>::default();
// Content tags.
m.insert(b"address", CONTENT);
m.insert(b"audio", CONTENT);
Expand Down Expand Up @@ -174,8 +174,8 @@ lazy_static! {
m
};

static ref SVG_TAG_WHITESPACE_MINIFICATION: HashMap<&'static [u8], &'static WhitespaceMinification> = {
let mut m = HashMap::<&'static [u8], &'static WhitespaceMinification>::new();
static ref SVG_TAG_WHITESPACE_MINIFICATION: FxHashMap<&'static [u8], &'static WhitespaceMinification> = {
let mut m = FxHashMap::<&'static [u8], &'static WhitespaceMinification>::default();

// Content tags.
m.insert(b"desc", CONTENT);
Expand Down
3 changes: 2 additions & 1 deletion rust/main/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ maintenance = { status = "actively-developed" }
[dependencies]
aho-corasick = "0.7"
css-minify = "0.2.2"
minify-js = "0.2.6"
lazy_static = "1.4"
memchr = "2"
minify-js = "0.2.6"
rustc-hash = "1.1.0"
4 changes: 2 additions & 2 deletions rust/main/src/ast/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::HashMap;
use rustc_hash::FxHashMap;
use std::fmt::{Debug, Formatter};
use std::str::from_utf8;

Expand Down Expand Up @@ -67,7 +67,7 @@ pub enum NodeData {
ended: bool,
},
Element {
attributes: HashMap<Vec<u8>, AttrVal>,
attributes: FxHashMap<Vec<u8>, AttrVal>,
children: Vec<NodeData>,
// If the source doesn't have a closing tag, then we can't add one, as otherwise output could be longer than source.
closing_tag: ElementClosingTag,
Expand Down
4 changes: 2 additions & 2 deletions rust/main/src/minify/element.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::HashMap;
use rustc_hash::FxHashMap;

use crate::ast::{AttrVal, ElementClosingTag, NodeData};
use crate::cfg::Cfg;
Expand All @@ -19,7 +19,7 @@ pub fn minify_element(
// If the last node of the parent is an element and it's this one.
is_last_child_text_or_element_node: bool,
tag_name: &[u8],
attributes: HashMap<Vec<u8>, AttrVal>,
attributes: FxHashMap<Vec<u8>, AttrVal>,
closing_tag: ElementClosingTag,
children: Vec<NodeData>,
) {
Expand Down
8 changes: 4 additions & 4 deletions rust/main/src/parse/element.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::HashMap;
use rustc_hash::FxHashMap;

use crate::ast::{AttrVal, ElementClosingTag, NodeData, ScriptOrStyleLang};
use crate::common::gen::codepoints::{
Expand Down Expand Up @@ -37,7 +37,7 @@ pub fn peek_tag_name(code: &mut Code) -> Vec<u8> {
// Derive Eq for testing.
#[derive(Eq, PartialEq)]
pub struct ParsedTag {
pub attributes: HashMap<Vec<u8>, AttrVal>,
pub attributes: FxHashMap<Vec<u8>, AttrVal>,
pub name: Vec<u8>,
pub self_closing: bool,
}
Expand All @@ -58,10 +58,10 @@ impl Debug for ParsedTag {
}

// While not valid, attributes in closing tags still need to be parsed (and then discarded) as attributes e.g. `</div x=">">`, which is why this function is used for both opening and closing tags.
// TODO Use generics to create version that doesn't create a HashMap.
// TODO Use generics to create version that doesn't create a FxHashMap.
pub fn parse_tag(code: &mut Code) -> ParsedTag {
let elem_name = parse_tag_name(code);
let mut attributes = HashMap::new();
let mut attributes = FxHashMap::default();
let self_closing;
loop {
// At the beginning of this loop, the last parsed unit was either the tag name or an attribute (including its value, if it had one).
Expand Down

0 comments on commit 5989c06

Please sign in to comment.