Skip to content

Commit

Permalink
Split minify passes into separate files
Browse files Browse the repository at this point in the history
  • Loading branch information
wilsonzlin committed Mar 6, 2023
1 parent 93757fe commit 3b97569
Show file tree
Hide file tree
Showing 9 changed files with 1,193 additions and 1,127 deletions.
2 changes: 1 addition & 1 deletion rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ version = "0.5.6"
[dependencies]
aho-corasick = "0.7"
lazy_static = "1.4"
parse-js = "0.18"
parse-js = "0.19"

[features]
serialize = ["parse-js/serialize"]
9 changes: 6 additions & 3 deletions rust/src/emit/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@ use crate::TopLevelMode;
use parse_js::lex::Lexer;
use parse_js::parse::Parser;
use parse_js::session::Session;
use parse_js::symbol::SymbolGenerator;

fn check(top_level_mode: TopLevelMode, src: &str, expected: &str) -> () {
let session = Session::new();
let mut parser = Parser::new(Lexer::new(src.as_bytes()));
let node = parser.parse_top_level(&session, top_level_mode).unwrap();
let node = parser
.parse_top_level(&session, SymbolGenerator::new(), top_level_mode)
.unwrap();
let mut out = Vec::new();
minify_js(&session, node);
emit_js(&mut out, node);
Expand Down Expand Up @@ -215,9 +218,9 @@ fn test_emit_jsx() {
r#"
import CompImp from "./comp";
let div = {a:"div"};
let U = {a:"div"};
const CompLocal = () => <div.a><strong/></div.a>;
const CompLocal = () => <U.a><strong/></U.a>;
render(<CompImp><CompLocal/></CompImp>);
"#,
Expand Down
194 changes: 194 additions & 0 deletions rust/src/minify/advanced_if.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
use parse_js::ast::new_node;
use parse_js::ast::Node;
use parse_js::ast::NodeData;
use parse_js::ast::Syntax;
use parse_js::ast::VarDeclMode;
use parse_js::operator::OperatorName;
use parse_js::session::Session;
use parse_js::session::SessionVec;
use parse_js::source::SourceRange;
use parse_js::symbol::Scope;

// If statement optimisation:
// - `if (a) { b }` => `a && b` if `b` can be reduced to a single expression.
// - `if (a) { b } else { c }` => `a ? b : c` if `b` and `c` can be reduced to a single expression.
// - `if (a) { b; return c }` => `if (a) return b, c` if `b` can be reduced to a single expression.
// The last form is more for normalisation: it doesn't minify much by itself (it still remains a statement), but allows a containing `if` to optimise `if (a) { b; return c } d; return e` into `return a ? (b, c) : (d, e)` if `b` and `d` can be reduced to a single expression. Otherwise, we wouldn't be able to minify the containing `if`.
// Note that it's not possible for both branches to return, as a previous pass should have already unwrapped the unnecessary block. We also normalise it such that if only `else` returns, it's flipped, and then the `else` can be unwrapped.

// We only perform advanced statement analysis and transformation to expression in `if` and `else` blocks as that will allow opportunities to transform `if` into logical expressions. This isn't useful elsewhere, as a sequence of expression statements is the same size as a sequence of expressions separated by commas, so the fact it's an expression is not being leveraged.

// We first perform some analysis to see if it's even worthwhile to perform this optimisation.
pub fn analyse_if_branch<'a>(stx: &Syntax<'a>) -> bool {
let Syntax::BlockStmt { body } = stx else {
// We should have already normalised all `if` branches into a block if they were single statements, so this should not be possible.
unreachable!();
};
let mut block_returned = false;
let mut if_returned = false;
for stmt in body.iter() {
match &stmt.stx {
Syntax::VarDecl {
mode, declarators, ..
} => {
match mode {
// We can make `var` declarations into expressions by hoisting the declaration part and leaving behind an assignment expression (if an initialiser exists).
// TODO Support non-identifier patterns, although they may not be worth minifying if we have to hoist and therefore duplicate the variable names.
VarDeclMode::Var => {
if declarators.iter().any(|d| match d.pattern.stx {
Syntax::IdentifierPattern { .. } => false,
_ => true,
}) {
return false;
}
}
// TODO We currently disallow if `let` or `const`, however there is a complex approach we could consider in the future: they're scoped to the block, so we can either wrap our optimised expression in a block or create a unique variable in the nearest closure and hoist it like `var`. Since the former means we're still left with a (block) statement, we choose the latter, but that means we can't do this if we're in the global scope as we're not allowed to introduce global variables (even if they're very unlikely to collide in reality). We'd have to replace all usages of these variables, however.
VarDeclMode::Const | VarDeclMode::Let => return false,
};
}
Syntax::ExpressionStmt { .. } => {}
Syntax::ReturnStmt { .. } => block_returned = true,
// Since we perform this optimisation bottom-up, any IfStmt should already be optimised, so if they were optimised and still exist as a statement, they should only have exactly one statement of `return` in `if` and `else`.
Syntax::IfStmt {
consequent: NodeData {
stx: Syntax::ReturnStmt { .. },
..
},
alternate: None,
..
} => if_returned = true,
// Debugger and empty statements should already be removed.
_ => return false,
};
}
// We must only be left with at most one return statement (i.e. unconditional, although value can be conditional). Essentially, this means that if we have an `if (x) return`, we must have a block-level return, as otherwise we cannot represent it as a single `return`.
!if_returned || block_returned
}

pub struct ProcessedIfBranch<'a> {
pub expression: Node<'a>,
pub hoisted_vars: SessionVec<'a, SourceRange<'a>>,
// If true, it means that it's not an expression, but a single return statement with the expression as the return value.
pub returns: bool,
}

fn process_if_branch_block<'a, 'b>(
session: &'a Session,
scope: Scope<'a>,
body: &'b mut [Node<'a>],
) -> ProcessedIfBranch<'a> {
let mut returns = false;
let mut hoisted_vars: SessionVec<'a, SourceRange<'a>> = session.new_vec();
let mut expressions: SessionVec<'a, Node<'a>> = session.new_vec();
let mut i = 0;
while i < body.len() {
let loc = body[i].loc;
let scope = body[i].scope;
match &mut body[i].stx {
Syntax::ExpressionStmt { expression } => {
expressions.push(expression.take(session));
}
Syntax::ReturnStmt { value } => {
returns = true;
expressions.push(match value {
Some(value) => value.take(session),
None => new_node(session, scope, loc, Syntax::IdentifierExpr {
name: SourceRange::from_slice(b"undefined"),
}),
});
}
Syntax::VarDecl {
declarators,
mode: VarDeclMode::Var,
..
} => {
for decl in declarators.iter_mut() {
let target = decl.pattern.take(session);
let Syntax::IdentifierPattern { name } = target.stx else {
unreachable!();
};
hoisted_vars.push(name);
if let Some(init) = &mut decl.initializer {
let right = init.take(session);
expressions.push(new_node(
session,
scope,
name + init.loc,
Syntax::BinaryExpr {
parenthesised: false,
operator: OperatorName::Assignment,
left: target,
right,
},
));
}
}
}
Syntax::IfStmt {
test,
consequent:
NodeData {
stx: Syntax::ReturnStmt { value },
loc: ret_loc,
..
},
alternate: None,
} => {
returns = true;

// Take before we reborrow mutably for process_if_branch_block.
let test = test.take(session);
let consequent = value.as_mut().map(|v| v.take(session)).unwrap_or(new_node(
session,
scope,
*ret_loc,
Syntax::IdentifierExpr {
name: SourceRange::from_slice(b"undefined"),
},
));

let mut remaining = process_if_branch_block(session, scope, &mut body[i + 1..]);
assert!(remaining.returns);
hoisted_vars.append(&mut remaining.hoisted_vars);
let alternate = remaining.expression;
expressions.push(new_node(session, scope, loc, Syntax::ConditionalExpr {
parenthesised: false,
test,
consequent,
alternate,
}));
break;
}
_ => unreachable!(),
};
i += 1;
}

ProcessedIfBranch {
expression: expressions
.into_iter()
.reduce(|left, right| {
new_node(session, scope, left.loc + right.loc, Syntax::BinaryExpr {
parenthesised: false,
operator: OperatorName::Comma,
left,
right,
})
})
.unwrap(),
hoisted_vars,
returns,
}
}

pub fn process_if_branch<'a, 'b>(
session: &'a Session,
scope: Scope<'a>,
branch: &'b mut NodeData<'a>,
) -> ProcessedIfBranch<'a> {
let Syntax::BlockStmt { body } = &mut branch.stx else {
// We should have already normalised all `if` branches into a block if they were single statements, so this should not be possible.
unreachable!()
};
process_if_branch_block(session, scope, body)
}
68 changes: 68 additions & 0 deletions rust/src/minify/ctx.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use parse_js::ast::Node;
use parse_js::session::Session;
use parse_js::session::SessionHashMap;
use parse_js::session::SessionHashSet;
use parse_js::session::SessionVec;
use parse_js::source::SourceRange;
use parse_js::symbol::Identifier;
use parse_js::symbol::Scope;
use parse_js::symbol::Symbol;

// Our additional state that's associated with each Symbol.
#[derive(Default)]
pub struct MinifySymbol<'a> {
pub minified_name: Option<SourceRange<'a>>,
pub is_used_as_jsx_component: bool,
pub has_usage: bool,
// If this is true, and this symbol is associated with a function, don't transform the function into an arrow function, even if it doesn't use `this`.
pub is_used_as_constructor: bool,
// Similar to `is_used_as_constructor`, although a weaker signal, since the presence of `prototype` is highly likely to mean it's a constructor function, but not as certain as `new`.
pub has_prototype: bool,
}

// Our additional state that's associated with each Scope.
pub struct MinifyScope<'a> {
// Variables that are declared by an ancestor (not own) scope (or is not declared anywhere and assumed to be global), and used by code in own or any descendant scope.
pub inherited_vars: SessionHashSet<'a, Identifier<'a>>,
// Function declarations within this closure-like scope that must be hoisted to declarations at the very beginning of this closure's code (so we can transform them to `var` and still have them work correctly). There may be multiple closures with the same name, nested deep with many blocks and branches, which is why we use a map; the last visited (lexical) declaration wins. Note that this is only populated if this scope is a closure; function declarations don't hoist to blocks.
// Since they could be deep and anywhere, we must take them and move them into this map; we can't just look at a BlockStmt's children as they may not always be there.
pub hoisted_functions: SessionHashMap<'a, Identifier<'a>, Node<'a>>,
// `var` declarations in this closure that need to be moved to allow for some optimisation.
pub hoisted_vars: SessionVec<'a, Identifier<'a>>,
}

impl<'a> MinifyScope<'a> {
pub fn new(session: &'a Session) -> MinifyScope<'a> {
MinifyScope {
inherited_vars: session.new_hashset(),
hoisted_functions: session.new_hashmap(),
hoisted_vars: session.new_vec(),
}
}
}

pub struct Ctx<'a, 'b> {
pub session: &'a Session,
pub symbols: &'b mut SessionHashMap<'a, Symbol, MinifySymbol<'a>>,
pub scopes: &'b mut SessionHashMap<'a, Scope<'a>, MinifyScope<'a>>,
}

impl<'a, 'b> Ctx<'a, 'b> {
// See [notes/Name minification.md] for the algorithm in more detail.
pub fn track_variable_usage(&mut self, scope: Scope<'a>, name: Identifier<'a>) {
let mut cur = Some(scope);
while let Some(scope) = cur {
if let Some(sym) = scope.get_symbol(name) {
self.symbols.entry(sym).or_default().has_usage = true;
break;
};
self
.scopes
.entry(scope)
.or_insert_with(|| MinifyScope::new(self.session))
.inherited_vars
.insert(name);
cur = scope.parent();
}
}
}
Loading

0 comments on commit 3b97569

Please sign in to comment.