Skip to content

Commit

Permalink
Merge pull request #16 from javiersuweijie/main
Browse files Browse the repository at this point in the history
chore: upstreamed changes from zkemail
  • Loading branch information
Divide-By-0 committed Sep 28, 2023
2 parents c2d71de + 783d28b commit 341da24
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 353 deletions.
2 changes: 1 addition & 1 deletion compiler/cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ program.on('command:*', () => {

program.parse(process.argv);

if (!process.args.length) {
if (!process.argv.length) {
program.help();
}
13 changes: 3 additions & 10 deletions compiler/gen.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,13 @@
const fs = require('fs');
const path = require('path');
const regexpTree = require('regexp-tree');
const assert = require('assert');
const lexical = require('./lexical');
const regex_to_dfa = require('./regex_to_dfa');


async function generateCircuit(regex, circuitLibPath, circuitName) {
const ast = regexpTree.parse(`/${regex}/`);
regexpTree.traverse(ast, {
'*': function ({ node }) {
if (node.type === 'CharacterClass') {
throw new Error('CharacterClass not supported');
}
},
});

const graph_json = lexical.compile(regex);
const graph_json = JSON.parse(regex_to_dfa.regexToGraph(regex_to_dfa.regexToMinDFASpec(regex)));
const N = graph_json.length;

// Outgoing nodes
Expand Down
130 changes: 29 additions & 101 deletions compiler/lexical.js
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ function parseRegex(text) {
let i = 0;
while (i < text.length) {
if (text[i] == '\\') {
new_text.push([text[i+1]]);
const escapeMap = { n: '\n', r: '\r', t: '\t', v: '\v', f: '\f', '^': String.fromCharCode(128) };
const char = text[i + 1];
new_text.push([escapeMap[char] || char]);
i += 2;
} else {
new_text.push(text[i]);
Expand All @@ -152,11 +154,11 @@ function parseRegex(text) {
}

/**
* Convert regular expression to nondeterministic finite automaton.
*
* @param {string} text @see parseRegex()
* @return {object|string}
*/
* Convert regular expression to nondeterministic finite automaton.
*
* @param {string} text @see parseRegex()
* @return {object|string}
*/
function regexToNfa(text) {
'use strict';
function generateGraph(node, start, end, count) {
Expand Down Expand Up @@ -214,11 +216,11 @@ function regexToNfa(text) {
}

/**
* Convert nondeterministic finite automaton to deterministic finite automaton.
*
* @param {object} nfa @see regexToNfa(), the function assumes that the given NFA is valid.
* @return {object} dfa Returns the first element of the DFA.
*/
* Convert nondeterministic finite automaton to deterministic finite automaton.
*
* @param {object} nfa @see regexToNfa(), the function assumes that the given NFA is valid.
* @return {object} dfa Returns the first element of the DFA.
*/
function nfaToDfa(nfa) {
'use strict';
function getClosure(nodes) {
Expand All @@ -237,6 +239,11 @@ function nfaToDfa(nfa) {
}
while (stack.length > 0) {
top = stack.pop();
// If top is of type string and starts with "Error" then return error
if (typeof top === 'string' && top[0] === 'E') {
console.log(top);
continue;
}
for (i = 0; i < top.edges.length; i += 1) {
if (top.edges[i][0] === 'ϵ') {
if (closure.indexOf(top.edges[i][1]) < 0) {
Expand Down Expand Up @@ -325,11 +332,11 @@ function nfaToDfa(nfa) {
}

/**
* Convert the DFA to its minimum form using Hopcroft's algorithm.
*
* @param {object} dfa @see nfaToDfa(), the function assumes that the given DFA is valid.
* @return {object} dfa Returns the first element of the minimum DFA.
*/
* Convert the DFA to its minimum form using Hopcroft's algorithm.
*
* @param {object} dfa @see nfaToDfa(), the function assumes that the given DFA is valid.
* @return {object} dfa Returns the first element of the minimum DFA.
*/
function minDfa(dfa) {
'use strict';
function getReverseEdges(start) {
Expand Down Expand Up @@ -514,88 +521,9 @@ function minDfa(dfa) {
return buildMinNfa(dfa, partitions, idMap, revEdges);
}

function toNature(col) {
var i,
j,
base = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
result = 0;
if ('1' <= col[0] && col[0] <= '9') {
result = parseInt(col, 10);
} else {
for (i = 0, j = col.length - 1; i < col.length; i += 1, j -= 1) {
result += Math.pow(base.length, j) * (base.indexOf(col[i]) + 1);
}
}
return result;
}

// '(\r\n|\x80)(to|from):([A-Za-z0-9 _."@-]+<)?[a-zA-Z0-9_.-]+@[a-zA-Z0-9_.]+>?\r\n';
// let regex = '(\r\n|\x80)(to|from):((a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9| |_|.|"|@|-)+<)?(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_|.|-)+@(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_|.|-)+>?\r\n';

const key_chars = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)';
const catch_all = '(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|!|"|#|$|%|&|\'|\\(|\\)|\\*|\\+|,|-|.|/|:|;|<|=|>|\\?|@|[|\\\\|]|^|_|`|{|\\||}|~| |\t|\n|\r|\x0b|\x0c)';
const catch_all_without_semicolon = '(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|!|"|#|$|%|&|\'|\\(|\\)|\\*|\\+|,|-|.|/|:|<|=|>|\\?|@|[|\\\\|]|^|_|`|{|\\||}|~| |\t|\n|\r|\x0b|\x0c)';
const base_64 = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\\+|/|=)';
const word_char = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_)';


function compile(regex) {
// let regex = `\r\ndkim-signature:(${key_chars}=${catch_all_without_semicolon}+; )+bh=${base_64}+; `;

// console.log(regex);
// console.log(Buffer.from(regex).toString('base64'));

// let regex = 'hello(0|1|2|3|4|5|6|7|8|9)+world';
let nfa = regexToNfa(regex);
let dfa = minDfa(nfaToDfa(nfa));

var i,
states = {},
nodes = [],
stack = [dfa],
symbols = [],
top;

while (stack.length > 0) {
top = stack.pop();
if (!states.hasOwnProperty(top.id)) {
states[top.id] = top;
top.nature = toNature(top.id);
nodes.push(top);
for (i = 0; i < top.edges.length; i += 1) {
if (top.edges[i][0] !== 'ϵ' && symbols.indexOf(top.edges[i][0]) < 0) {
symbols.push(top.edges[i][0]);
}
stack.push(top.edges[i][1]);
}
}
}
nodes.sort(function (a, b) {
return a.nature - b.nature;
});
symbols.sort();

let graph = [];
for (let i = 0; i < nodes.length; i += 1) {
let curr = {};
curr.type = nodes[i].type;
curr.edges = {};
for (let j = 0; j < symbols.length; j += 1) {
if (nodes[i].trans.hasOwnProperty(symbols[j])) {
curr.edges[symbols[j]] = nodes[i].trans[symbols[j]].nature-1;
}
}
graph[nodes[i].nature-1] = curr;
}

return graph;
}

module.exports = {
compile,
key_chars,
base_64,
word_char,
catch_all,
catch_all_without_semicolon,
};
if (typeof require === 'function') {
exports.parseRegex = parseRegex;
exports.regexToNfa = regexToNfa;
exports.nfaToDfa = nfaToDfa;
exports.minDfa = minDfa;
}
193 changes: 193 additions & 0 deletions compiler/regex_to_dfa.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/*jslint browser: true*/
const { minDfa, nfaToDfa, regexToNfa } = require('./lexical');

/** This section defines helper regex components -- to edit the regex used, edit the return
* of the test_regex function.
* All of the relevant regexes are in the main repo README.
*/

// Helper components

const a2z_nosep = 'abcdefghijklmnopqrstuvwxyz';
const A2Z_nosep = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
const a2f_nosep = 'abcdef';
const A2F_nosep = 'ABCDEF';
const r0to9_nosep = '0123456789';

// TODO: Note that this is replicated code in lexical.js as well
// Note that ^ has to be manually replaced with \x80 in the regex
const escapeMap = { n: '\n', r: '\r', t: '\t', v: '\v', f: '\f' };
let whitespace = Object.values(escapeMap);
const slash_s = whitespace.join('|');

// Note that this is not complete and very case specific i.e. can only handle a-z and a-f, and not a-c.
// This function expands [] sections to convert values for https://zkregex.com/min_dfa
// The input is a regex with [] and special characters (i.e. the first line of min_dfa tool)
// The output is expanded regexes without any special characters
function regexToMinDFASpec(str) {
// Replace all A-Z with A2Z etc
// TODO: Upstream this to min_dfa
let combined_nosep = str
.replaceAll('A-Z', A2Z_nosep)
.replaceAll('a-z', a2z_nosep)
.replaceAll('A-F', A2F_nosep)
.replaceAll('a-f', a2f_nosep)
.replaceAll('0-9', r0to9_nosep)
.replaceAll('\\w', A2Z_nosep + r0to9_nosep + a2z_nosep + '_')
.replaceAll('\\d', r0to9_nosep)
.replaceAll('\\s', slash_s);

function addPipeInsideBrackets(str) {
let result = '';
let insideBrackets = false;
for (let i = 0; i < str.length; i++) {
if (str[i] === '[') {
result += str[i];
insideBrackets = true;
continue;
} else if (str[i] === ']') {
insideBrackets = false;
}
let str_to_add = str[i];
if (str[i] === '\\') {
i++;
str_to_add += str[i];
}
result += insideBrackets ? '|' + str_to_add : str_to_add;
}
return result.replaceAll('[|', '[').replaceAll('[', '(').replaceAll(']', ')');
}

// function makeCurlyBracesFallback(str) {
// let result = "";
// let insideBrackets = false;
// for (let i = 0; i < str.length; i++) {
// if (str[i] === "{") {
// result += str[i];
// insideBrackets = true;
// continue;
// } else if (str[i] === "}") {
// insideBrackets = false;
// }
// result += insideBrackets ? "|" + str[i] : str[i];
// }
// return result.replaceAll("[|", "[").replaceAll("[", "(").replaceAll("]", ")");
// }

function checkIfBracketsHavePipes(str) {
let result = true;
let insideBrackets = false;
let insideParens = 0;
let indexAt = 0;
for (let i = 0; i < str.length; i++) {
if (indexAt >= str.length) break;
if (str[indexAt] === '[') {
insideBrackets = true;
indexAt++;
continue;
} else if (str[indexAt] === ']') {
insideBrackets = false;
}
if (str[indexAt] === '(') {
insideParens++;
} else if (str[indexAt] === ')') {
insideParens--;
}
if (insideBrackets) {
if (str[indexAt] === '|') {
indexAt++;
} else {
result = false;
return result;
}
}
if (!insideParens && str[indexAt] === '|') {
console.log('Error: | outside of parens!');
}
if (str[indexAt] === '\\') {
indexAt++;
}
indexAt++;
}
return result;
}

let combined;
if (!checkIfBracketsHavePipes(combined_nosep)) {
// console.log("Adding pipes within brackets between everything!");
combined = addPipeInsideBrackets(combined_nosep);
if (!checkIfBracketsHavePipes(combined)) {
console.log('Did not add brackets correctly!');
}
} else {
combined = combined_nosep;
}
return combined;
}

function toNature(col) {
var i,
j,
base = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
result = 0;
if ('1' <= col[0] && col[0] <= '9') {
result = parseInt(col, 10);
} else {
for (i = 0, j = col.length - 1; i < col.length; i += 1, j -= 1) {
result += Math.pow(base.length, j) * (base.indexOf(col[i]) + 1);
}
}
return result;
}

function regexToGraph(regex) {
let nfa = regexToNfa(regex);
let dfa = minDfa(nfaToDfa(nfa));

var i,
states = {},
nodes = [],
stack = [dfa],
symbols = [],
top;

while (stack.length > 0) {
top = stack.pop();
if (!Object.keys(states).includes(top.id)) {
states[top.id] = top;
top.nature = toNature(top.id);
nodes.push(top);
for (i = 0; i < top.edges.length; i += 1) {
if (top.edges[i][0] !== 'ϵ' && symbols.indexOf(top.edges[i][0]) < 0) {
symbols.push(top.edges[i][0]);
}
stack.push(top.edges[i][1]);
}
}
}
nodes.sort(function (a, b) {
return a.nature - b.nature;
});
symbols.sort();

let graph = [];
for (let i = 0; i < nodes.length; i += 1) {
let curr = {};
curr.type = nodes[i].type;
curr.edges = {};
for (let j = 0; j < symbols.length; j += 1) {
if (Object.keys(nodes[i].trans).includes(symbols[j])) {
curr.edges[symbols[j]] = nodes[i].trans[symbols[j]].nature - 1;
}
}
graph[nodes[i].nature - 1] = curr;
}

return JSON.stringify(graph);
}

if (typeof require === 'function') {
exports.regexToMinDFASpec = regexToMinDFASpec;
exports.toNature = toNature;
exports.regexToGraph = regexToGraph;
}
Loading

0 comments on commit 341da24

Please sign in to comment.