From c21b590bfc7bdac04887aa20e9d88fad007db631 Mon Sep 17 00:00:00 2001 From: javiersuweijie Date: Thu, 21 Sep 2023 15:25:43 +0800 Subject: [PATCH 1/3] chore: upstreamed changes from zkemail --- compiler/cli.js | 2 +- compiler/gen.js | 12 +- compiler/lexical.js | 992 +++++++++++++++++------------------- compiler/regex_to_dfa.js | 214 ++++++++ test/regex-compiler.test.js | 47 +- test/regex-compiler.test.ts | 218 -------- 6 files changed, 715 insertions(+), 770 deletions(-) create mode 100644 compiler/regex_to_dfa.js delete mode 100644 test/regex-compiler.test.ts diff --git a/compiler/cli.js b/compiler/cli.js index 834d0e1..ff36576 100644 --- a/compiler/cli.js +++ b/compiler/cli.js @@ -22,6 +22,6 @@ program.on('command:*', () => { program.parse(process.argv); -if (!process.args.length) { +if (!process.argv.length) { program.help(); } diff --git a/compiler/gen.js b/compiler/gen.js index a8f4ed5..9f4f888 100644 --- a/compiler/gen.js +++ b/compiler/gen.js @@ -3,18 +3,12 @@ const path = require('path'); const regexpTree = require('regexp-tree'); const assert = require('assert'); const lexical = require('./lexical'); +const regex_to_dfa = require('./regex_to_dfa'); + async function generateCircuit(regex, circuitLibPath, circuitName) { - const ast = regexpTree.parse(`/${regex}/`); - regexpTree.traverse(ast, { - '*': function ({ node }) { - if (node.type === 'CharacterClass') { - throw new Error('CharacterClass not supported'); - } - }, - }); - const graph_json = lexical.compile(regex); + const graph_json = JSON.parse(regex_to_dfa.regexToGraph(regex_to_dfa.regexToMinDFASpec(regex))); const N = graph_json.length; // Outgoing nodes diff --git a/compiler/lexical.js b/compiler/lexical.js index 6cdbf46..7e0627e 100644 --- a/compiler/lexical.js +++ b/compiler/lexical.js @@ -1,6 +1,3 @@ -/* eslint-disable no-prototype-builtins */ -/*jslint browser: true*/ - /** * Try parsing simple regular expression to syntax tree. * @@ -19,136 +16,140 @@ * @param {string} text The input regular expression * @return {string|object} Returns a string that is an error message if failed to parse the expression, * otherwise returns an object which is the syntax tree. + * + * Edited from https://github.com/CyberZHG/toolbox/blob/gh-pages/js/lexical.js */ function parseRegex(text) { - 'use strict'; - function parseSub(text, begin, end, first) { - var i, - sub, - last = 0, - node = {'begin': begin, 'end': end}, - virNode, - tempNode, - stack = 0, - parts = []; - if (text.length === 0) { - return 'Error: empty input at ' + begin + '.'; + "use strict"; + function parseSub(text, begin, end, first) { + var i, + sub, + last = 0, + node = { begin: begin, end: end }, + virNode, + tempNode, + stack = 0, + parts = []; + if (text.length === 0) { + return "Error: empty input at " + begin + "."; + } + if (first) { + for (i = 0; i <= text.length; i += 1) { + if (i === text.length || (text[i] === "|" && stack === 0)) { + if (last === 0 && i === text.length) { + return parseSub(text, begin + last, begin + i, false); + } + sub = parseSub(text.slice(last, i), begin + last, begin + i, true); + if (typeof sub === "string") { + return sub; + } + parts.push(sub); + last = i + 1; + } else if (text[i] === "(") { + stack += 1; + } else if (text[i] === ")") { + stack -= 1; } - if (first) { - for (i = 0; i <= text.length; i += 1) { - if (i === text.length || (text[i] === '|' && stack === 0)) { - if (last === 0 && i === text.length) { - return parseSub(text, begin + last, begin + i, false); - } - sub = parseSub(text.slice(last, i), begin + last, begin + i, true); - if (typeof sub === 'string') { - return sub; - } - parts.push(sub); - last = i + 1; - } else if (text[i] === '(') { - stack += 1; - } else if (text[i] === ')') { - stack -= 1; - } - } - if (parts.length === 1) { - return parts[0]; + } + if (parts.length === 1) { + return parts[0]; + } + node.type = "or"; + node.parts = parts; + } else { + for (i = 0; i < text.length; i += 1) { + if (text[i] === "(") { + last = i + 1; + i += 1; + stack = 1; + while (i < text.length && stack !== 0) { + if (text[i] === "(") { + stack += 1; + } else if (text[i] === ")") { + stack -= 1; } - node.type = 'or'; - node.parts = parts; + i += 1; + } + if (stack !== 0) { + return "Error: missing right bracket for " + (begin + last) + "."; + } + i -= 1; + sub = parseSub(text.slice(last, i), begin + last, begin + i, true); + if (typeof sub === "string") { + return sub; + } + sub.begin -= 1; + sub.end += 1; + parts.push(sub); + } else if (text[i] === "*") { + if (parts.length === 0) { + return "Error: unexpected * at " + (begin + i) + "."; + } + tempNode = { begin: parts[parts.length - 1].begin, end: parts[parts.length - 1].end + 1 }; + tempNode.type = "star"; + tempNode.sub = parts[parts.length - 1]; + parts[parts.length - 1] = tempNode; + } else if (text[i] === "+") { + if (parts.length === 0) { + return "Error: unexpected + at " + (begin + i) + "."; + } + virNode = { begin: parts[parts.length - 1].begin, end: parts[parts.length - 1].end + 1 }; + virNode.type = "star"; + virNode.sub = parts[parts.length - 1]; + tempNode = { begin: parts[parts.length - 1].begin, end: parts[parts.length - 1].end + 1 }; + tempNode.type = "cat"; + tempNode.parts = [parts[parts.length - 1], virNode]; + parts[parts.length - 1] = tempNode; + } else if (text[i] === "?") { + if (parts.length === 0) { + return "Error: unexpected + at " + (begin + i) + "."; + } + virNode = { begin: parts[parts.length - 1].begin, end: parts[parts.length - 1].end + 1 }; + virNode.type = "empty"; + virNode.sub = parts[parts.length - 1]; + tempNode = { begin: parts[parts.length - 1].begin, end: parts[parts.length - 1].end + 1 }; + tempNode.type = "or"; + tempNode.parts = [parts[parts.length - 1], virNode]; + parts[parts.length - 1] = tempNode; + } else if (text[i] === "ϵ") { + tempNode = { begin: begin + i, end: begin + i + 1 }; + tempNode.type = "empty"; + parts.push(tempNode); + } else if (Array.isArray(text[i])) { + tempNode = { begin: begin + i, end: begin + i + 1 }; + tempNode.type = "text"; + tempNode.text = text[i][0]; + parts.push(tempNode); } else { - for (i = 0; i < text.length; i += 1) { - if (text[i] === '(') { - last = i + 1; - i += 1; - stack = 1; - while (i < text.length && stack !== 0) { - if (text[i] === '(') { - stack += 1; - } else if (text[i] === ')') { - stack -= 1; - } - i += 1; - } - if (stack !== 0) { - return 'Error: missing right bracket for ' + (begin + last) + '.'; - } - i -= 1; - sub = parseSub(text.slice(last, i), begin + last, begin + i, true); - if (typeof sub === 'string') { - return sub; - } - sub.begin -= 1; - sub.end += 1; - parts.push(sub); - } else if (text[i] === '*') { - if (parts.length === 0) { - return 'Error: unexpected * at ' + (begin + i) + '.'; - } - tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; - tempNode.type = 'star'; - tempNode.sub = parts[parts.length - 1]; - parts[parts.length - 1] = tempNode; - } else if (text[i] === '+') { - if (parts.length === 0) { - return 'Error: unexpected + at ' + (begin + i) + '.'; - } - virNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; - virNode.type = 'star'; - virNode.sub = parts[parts.length - 1]; - tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; - tempNode.type = 'cat'; - tempNode.parts = [parts[parts.length - 1], virNode]; - parts[parts.length - 1] = tempNode; - } else if (text[i] === '?') { - if (parts.length === 0) { - return 'Error: unexpected + at ' + (begin + i) + '.'; - } - virNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; - virNode.type = 'empty'; - virNode.sub = parts[parts.length - 1]; - tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; - tempNode.type = 'or'; - tempNode.parts = [parts[parts.length - 1], virNode]; - parts[parts.length - 1] = tempNode; - } else if (text[i] === 'ϵ') { - tempNode = {'begin': begin + i, 'end': begin + i + 1}; - tempNode.type = 'empty'; - parts.push(tempNode); - } else if (Array.isArray(text[i])) { - tempNode = {'begin': begin + i, 'end': begin + i + 1}; - tempNode.type = 'text'; - tempNode.text = text[i][0]; - parts.push(tempNode); - } else { - tempNode = {'begin': begin + i, 'end': begin + i + 1}; - tempNode.type = 'text'; - tempNode.text = text[i]; - parts.push(tempNode); - } - } - if (parts.length === 1) { - return parts[0]; - } - node.type = 'cat'; - node.parts = parts; + tempNode = { begin: begin + i, end: begin + i + 1 }; + tempNode.type = "text"; + tempNode.text = text[i]; + parts.push(tempNode); } - return node; + } + if (parts.length === 1) { + return parts[0]; + } + node.type = "cat"; + node.parts = parts; } + return node; + } - let new_text = []; - let i = 0; - while (i < text.length) { - if (text[i] == '\\') { - new_text.push([text[i+1]]); - i += 2; - } else { - new_text.push(text[i]); - i += 1; - } + let new_text = []; + let i = 0; + while (i < text.length) { + if (text[i] === "\\") { + const escapeMap = { n: "\n", r: "\r", t: "\t", v: "\v", f: "\f", "^": String.fromCharCode(128) }; + const char = text[i + 1]; + new_text.push([escapeMap[char] || char]); + i += 2; + } else { + new_text.push(text[i]); + i += 1; } - return parseSub(new_text, 0, new_text.length, true); + } + return parseSub(new_text, 0, new_text.length, true); } /** @@ -158,59 +159,65 @@ function parseRegex(text) { * @return {object|string} */ function regexToNfa(text) { - 'use strict'; - function generateGraph(node, start, end, count) { - var i, last, temp, tempStart, tempEnd; - if (!start.hasOwnProperty('id')) { - start.id = count; - count += 1; - } - switch (node.type) { - case 'empty': - start.edges.push(['ϵ', end]); - break; - case 'text': - start.edges.push([node.text, end]); - break; - case 'cat': - last = start; - for (i = 0; i < node.parts.length - 1; i += 1) { - temp = {'type': '', 'edges': []}; - count = generateGraph(node.parts[i], last, temp, count); - last = temp; - } - count = generateGraph(node.parts[node.parts.length - 1], last, end, count); - break; - case 'or': - for (i = 0; i < node.parts.length; i += 1) { - tempStart = {'type': '', 'edges': []}; - tempEnd = {'type': '', 'edges': [['ϵ', end]]}; - start.edges.push(['ϵ', tempStart]); - count = generateGraph(node.parts[i], tempStart, tempEnd, count); - } - break; - case 'star': - tempStart = {'type': '', 'edges': []}; - tempEnd = {'type': '', 'edges': [['ϵ', tempStart], ['ϵ', end]]}; - start.edges.push(['ϵ', tempStart]); - start.edges.push(['ϵ', end]); - count = generateGraph(node.sub, tempStart, tempEnd, count); - break; + "use strict"; + function generateGraph(node, start, end, count) { + var i, last, temp, tempStart, tempEnd; + if (!start.hasOwnProperty("id")) { + start.id = count; + count += 1; + } + switch (node.type) { + case "empty": + start.edges.push(["ϵ", end]); + break; + case "text": + start.edges.push([node.text, end]); + break; + case "cat": + last = start; + for (i = 0; i < node.parts.length - 1; i += 1) { + temp = { type: "", edges: [] }; + count = generateGraph(node.parts[i], last, temp, count); + last = temp; } - if (!end.hasOwnProperty('id')) { - end.id = count; - count += 1; + count = generateGraph(node.parts[node.parts.length - 1], last, end, count); + break; + case "or": + for (i = 0; i < node.parts.length; i += 1) { + tempStart = { type: "", edges: [] }; + tempEnd = { type: "", edges: [["ϵ", end]] }; + start.edges.push(["ϵ", tempStart]); + count = generateGraph(node.parts[i], tempStart, tempEnd, count); } - return count; + break; + case "star": + tempStart = { type: "", edges: [] }; + tempEnd = { + type: "", + edges: [ + ["ϵ", tempStart], + ["ϵ", end], + ], + }; + start.edges.push(["ϵ", tempStart]); + start.edges.push(["ϵ", end]); + count = generateGraph(node.sub, tempStart, tempEnd, count); + break; } - var ast = parseRegex(text), - start = {'type': 'start', 'edges': []}, - accept = {'type': 'accept', 'edges': []}; - if (typeof ast === 'string') { - return ast; + if (!end.hasOwnProperty("id")) { + end.id = count; + count += 1; } - generateGraph(ast, start, accept, 0); - return start; + return count; + } + var ast = parseRegex(text), + start = { type: "start", edges: [] }, + accept = { type: "accept", edges: [] }; + if (typeof ast === "string") { + return ast; + } + generateGraph(ast, start, accept, 0); + return start; } /** @@ -220,108 +227,115 @@ function regexToNfa(text) { * @return {object} dfa Returns the first element of the DFA. */ function nfaToDfa(nfa) { - 'use strict'; - function getClosure(nodes) { - var i, - closure = [], - stack = [], - symbols = [], - type = '', - top; - for (i = 0; i < nodes.length; i += 1) { - stack.push(nodes[i]); - closure.push(nodes[i]); - if (nodes[i].type === 'accept') { - type = 'accept'; - } - } - while (stack.length > 0) { - top = stack.pop(); - for (i = 0; i < top.edges.length; i += 1) { - if (top.edges[i][0] === 'ϵ') { - if (closure.indexOf(top.edges[i][1]) < 0) { - stack.push(top.edges[i][1]); - closure.push(top.edges[i][1]); - if (top.edges[i][1].type === 'accept') { - type = 'accept'; - } - } - } else { - if (symbols.indexOf(top.edges[i][0]) < 0) { - symbols.push(top.edges[i][0]); - } - } - } - } - closure.sort(function (a, b) { - return a.id - b.id; - }); - symbols.sort(); - return { - 'key': closure.map(function (x) { - return x.id; - }).join(','), - 'items': closure, - 'symbols': symbols, - 'type': type, - 'edges': [], - 'trans': {} - }; + "use strict"; + function getClosure(nodes) { + var i, + closure = [], + stack = [], + symbols = [], + type = "", + top; + for (i = 0; i < nodes.length; i += 1) { + stack.push(nodes[i]); + closure.push(nodes[i]); + if (nodes[i].type === "accept") { + type = "accept"; + } } - function getClosedMove(closure, symbol) { - var i, - j, - node, - nexts = []; - for (i = 0; i < closure.items.length; i += 1) { - node = closure.items[i]; - for (j = 0; j < node.edges.length; j += 1) { - if (symbol === node.edges[j][0]) { - if (nexts.indexOf(node.edges[j][1]) < 0) { - nexts.push(node.edges[j][1]); - } - } + while (stack.length > 0) { + top = stack.pop(); + // If top is of type string and starts with "Error" then return error + if (typeof top === "string" && top[0] === "E") { + console.log(top); + continue; + } + for (i = 0; i < top.edges.length; i += 1) { + if (top.edges[i][0] === "ϵ") { + if (closure.indexOf(top.edges[i][1]) < 0) { + stack.push(top.edges[i][1]); + closure.push(top.edges[i][1]); + if (top.edges[i][1].type === "accept") { + type = "accept"; } + } + } else { + if (symbols.indexOf(top.edges[i][0]) < 0) { + symbols.push(top.edges[i][0]); + } } - return getClosure(nexts); - } - function toAlphaCount(n) { - var a = 'A'.charCodeAt(0), - z = 'Z'.charCodeAt(0), - len = z - a + 1, - s = ''; - while (n >= 0) { - s = String.fromCharCode(n % len + a) + s; - n = Math.floor(n / len) - 1; - } - return s; + } } + closure.sort(function (a, b) { + return a.id - b.id; + }); + symbols.sort(); + return { + key: closure + .map(function (x) { + return x.id; + }) + .join(","), + items: closure, + symbols: symbols, + type: type, + edges: [], + trans: {}, + }; + } + function getClosedMove(closure, symbol) { var i, - first = getClosure([nfa]), - states = {}, - front = 0, - top, - closure, - queue = [first], - count = 0; - first.id = toAlphaCount(count); - states[first.key] = first; - while (front < queue.length) { - top = queue[front]; - front += 1; - for (i = 0; i < top.symbols.length; i += 1) { - closure = getClosedMove(top, top.symbols[i]); - if (!states.hasOwnProperty(closure.key)) { - count += 1; - closure.id = toAlphaCount(count); - states[closure.key] = closure; - queue.push(closure); - } - top.trans[top.symbols[i]] = states[closure.key]; - top.edges.push([top.symbols[i], states[closure.key]]); + j, + node, + nexts = []; + for (i = 0; i < closure.items.length; i += 1) { + node = closure.items[i]; + for (j = 0; j < node.edges.length; j += 1) { + if (symbol === node.edges[j][0]) { + if (nexts.indexOf(node.edges[j][1]) < 0) { + nexts.push(node.edges[j][1]); + } } + } } - return first; + return getClosure(nexts); + } + function toAlphaCount(n) { + var a = "A".charCodeAt(0), + z = "Z".charCodeAt(0), + len = z - a + 1, + s = ""; + while (n >= 0) { + s = String.fromCharCode((n % len) + a) + s; + n = Math.floor(n / len) - 1; + } + return s; + } + var i, + first = getClosure([nfa]), + states = {}, + front = 0, + top, + closure, + queue = [first], + count = 0; + first.id = toAlphaCount(count); + states[first.key] = first; + while (front < queue.length) { + top = queue[front]; + front += 1; + for (i = 0; i < top.symbols.length; i += 1) { + closure = getClosedMove(top, top.symbols[i]); + if (!states.hasOwnProperty(closure.key)) { + count += 1; + closure.id = toAlphaCount(count); + states[closure.key] = closure; + queue.push(closure); + } + top.trans[top.symbols[i]] = states[closure.key]; + top.edges.push([top.symbols[i], states[closure.key]]); + } + } + return first; } /** @@ -331,271 +345,211 @@ function nfaToDfa(nfa) { * @return {object} dfa Returns the first element of the minimum DFA. */ function minDfa(dfa) { - 'use strict'; - function getReverseEdges(start) { - var i, top, symbol, next, - front = 0, - queue = [start], - visited = {}, - symbols = {}, // The input alphabet - idMap = {}, // Map id to states - revEdges = {}; // Map id to the ids which connects to the id with an alphabet - visited[start.id] = true; - while (front < queue.length) { - top = queue[front]; - front += 1; - idMap[top.id] = top; - for (i = 0; i < top.symbols.length; i += 1) { - symbol = top.symbols[i]; - if (!symbols.hasOwnProperty(symbol)) { - symbols[symbol] = true; - } - next = top.trans[symbol]; - if (!revEdges.hasOwnProperty(next.id)) { - revEdges[next.id] = {}; - } - if (!revEdges[next.id].hasOwnProperty(symbol)) { - revEdges[next.id][symbol] = []; - } - revEdges[next.id][symbol].push(top.id); - if (!visited.hasOwnProperty(next.id)) { - visited[next.id] = true; - queue.push(next); - } - } + "use strict"; + function getReverseEdges(start) { + var i, + top, + symbol, + next, + front = 0, + queue = [start], + visited = {}, + symbols = {}, // The input alphabet + idMap = {}, // Map id to states + revEdges = {}; // Map id to the ids which connects to the id with an alphabet + visited[start.id] = true; + while (front < queue.length) { + top = queue[front]; + front += 1; + idMap[top.id] = top; + for (i = 0; i < top.symbols.length; i += 1) { + symbol = top.symbols[i]; + if (!symbols.hasOwnProperty(symbol)) { + symbols[symbol] = true; } - return [Object.keys(symbols), idMap, revEdges]; - } - function hopcroft(symbols, idMap, revEdges) { - var i, j, k, keys, key, key1, key2, top, group1, group2, symbol, revGroup, - ids = Object.keys(idMap).sort(), - partitions = {}, - front = 0, - queue = [], - visited = {}; - group1 = []; - group2 = []; - for (i = 0; i < ids.length; i += 1) { - if (idMap[ids[i]].type === 'accept') { - group1.push(ids[i]); - } else { - group2.push(ids[i]); - } + next = top.trans[symbol]; + if (!revEdges.hasOwnProperty(next.id)) { + revEdges[next.id] = {}; } - key = group1.join(','); - partitions[key] = group1; - queue.push(key); - visited[key] = 0; - if (group2.length !== 0) { - key = group2.join(','); - partitions[key] = group2; - queue.push(key); + if (!revEdges[next.id].hasOwnProperty(symbol)) { + revEdges[next.id][symbol] = []; } - while (front < queue.length) { - top = queue[front]; - front += 1; - if (top) { - top = top.split(','); - for (i = 0; i < symbols.length; i += 1) { - symbol = symbols[i]; - revGroup = {}; - for (j = 0; j < top.length; j += 1) { - if (revEdges.hasOwnProperty(top[j]) && revEdges[top[j]].hasOwnProperty(symbol)) { - for (k = 0; k < revEdges[top[j]][symbol].length; k += 1) { - revGroup[revEdges[top[j]][symbol][k]] = true; - } - } - } - keys = Object.keys(partitions); - for (j = 0; j < keys.length; j += 1) { - key = keys[j]; - group1 = []; - group2 = []; - for (k = 0; k < partitions[key].length; k += 1) { - if (revGroup.hasOwnProperty(partitions[key][k])) { - group1.push(partitions[key][k]); - } else { - group2.push(partitions[key][k]); - } - } - if (group1.length !== 0 && group2.length !== 0) { - delete partitions[key]; - key1 = group1.join(','); - key2 = group2.join(','); - partitions[key1] = group1; - partitions[key2] = group2; - if (visited.hasOwnProperty(key1)) { - queue[visited[key1]] = null; - visited[key1] = queue.length; - queue.push(key1); - visited[key2] = queue.length; - queue.push(key2); - } else if (group1.length <= group2.length) { - visited[key1] = queue.length; - queue.push(key1); - } else { - visited[key2] = queue.length; - queue.push(key2); - } - } - } - } - } + revEdges[next.id][symbol].push(top.id); + if (!visited.hasOwnProperty(next.id)) { + visited[next.id] = true; + queue.push(next); } - return Object.values(partitions); + } + } + return [Object.keys(symbols), idMap, revEdges]; + } + function hopcroft(symbols, idMap, revEdges) { + var i, + j, + k, + keys, + key, + key1, + key2, + top, + group1, + group2, + symbol, + revGroup, + ids = Object.keys(idMap).sort(), + partitions = {}, + front = 0, + queue = [], + visited = {}; + group1 = []; + group2 = []; + for (i = 0; i < ids.length; i += 1) { + if (idMap[ids[i]].type === "accept") { + group1.push(ids[i]); + } else { + group2.push(ids[i]); + } + } + key = group1.join(","); + partitions[key] = group1; + queue.push(key); + visited[key] = 0; + if (group2.length !== 0) { + key = group2.join(","); + partitions[key] = group2; + queue.push(key); } - function buildMinNfa(start, partitions, idMap, revEdges) { - var i, j, temp, node, symbol, - nodes = [], - group = {}, - edges = {}; - partitions.sort(function (a, b) { - var ka = a.join(','), kb = b.join(','); - if (ka < kb) { - return -1; + while (front < queue.length) { + top = queue[front]; + front += 1; + if (top) { + top = top.split(","); + for (i = 0; i < symbols.length; i += 1) { + symbol = symbols[i]; + revGroup = {}; + for (j = 0; j < top.length; j += 1) { + if (revEdges.hasOwnProperty(top[j]) && revEdges[top[j]].hasOwnProperty(symbol)) { + for (k = 0; k < revEdges[top[j]][symbol].length; k += 1) { + revGroup[revEdges[top[j]][symbol][k]] = true; + } } - if (ka > kb) { - return 1; + } + keys = Object.keys(partitions); + for (j = 0; j < keys.length; j += 1) { + key = keys[j]; + group1 = []; + group2 = []; + for (k = 0; k < partitions[key].length; k += 1) { + if (revGroup.hasOwnProperty(partitions[key][k])) { + group1.push(partitions[key][k]); + } else { + group2.push(partitions[key][k]); + } } - return 0; - }); - for (i = 0; i < partitions.length; i += 1) { - if (partitions[i].indexOf(start.id) >= 0) { - if (i > 0) { - temp = partitions[i]; - partitions[i] = partitions[0]; - partitions[0] = temp; - } - break; + if (group1.length !== 0 && group2.length !== 0) { + delete partitions[key]; + key1 = group1.join(","); + key2 = group2.join(","); + partitions[key1] = group1; + partitions[key2] = group2; + if (visited.hasOwnProperty(key1)) { + queue[visited[key1]] = null; + visited[key1] = queue.length; + queue.push(key1); + visited[key2] = queue.length; + queue.push(key2); + } else if (group1.length <= group2.length) { + visited[key1] = queue.length; + queue.push(key1); + } else { + visited[key2] = queue.length; + queue.push(key2); + } } + } } - for (i = 0; i < partitions.length; i += 1) { - node = { - id: (i + 1).toString(), - key: partitions[i].join(','), - items: [], - symbols: [], - type: idMap[partitions[i][0]].type, - edges: [], - trans: {}, - }; - for (j = 0; j < partitions[i].length; j += 1) { - node.items.push(idMap[partitions[i][j]]); - group[partitions[i][j]] = i; - } - edges[i] = {}; - nodes.push(node); - } - Object.keys(revEdges).forEach(function (to) { - Object.keys(revEdges[to]).forEach(function (symbol) { - revEdges[to][symbol].forEach(function (from) { - if (!edges[group[from]].hasOwnProperty(group[to])) { - edges[group[from]][group[to]] = {}; - } - edges[group[from]][group[to]][symbol] = true; - }); - }); - }); - Object.keys(edges).forEach(function (from) { - Object.keys(edges[from]).forEach(function (to) { - symbol = JSON.stringify(Object.keys(edges[from][to]).sort()); - nodes[from].symbols.push(symbol); - nodes[from].edges.push([symbol, nodes[to]]); - nodes[from].trans[symbol] = nodes[to]; - }); - }); - return nodes[0]; + } } - var edgesTuple = getReverseEdges(dfa), - symbols = edgesTuple[0], - idMap = edgesTuple[1], - revEdges = edgesTuple[2], - partitions = hopcroft(symbols, idMap, revEdges); - return buildMinNfa(dfa, partitions, idMap, revEdges); -} - -function toNature(col) { + return Object.values(partitions); + } + function buildMinNfa(start, partitions, idMap, revEdges) { var i, - j, - base = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', - result = 0; - if ('1' <= col[0] && col[0] <= '9') { - result = parseInt(col, 10); - } else { - for (i = 0, j = col.length - 1; i < col.length; i += 1, j -= 1) { - result += Math.pow(base.length, j) * (base.indexOf(col[i]) + 1); + j, + temp, + node, + symbol, + nodes = [], + group = {}, + edges = {}; + partitions.sort(function (a, b) { + var ka = a.join(","), + kb = b.join(","); + if (ka < kb) { + return -1; + } + if (ka > kb) { + return 1; + } + return 0; + }); + for (i = 0; i < partitions.length; i += 1) { + if (partitions[i].indexOf(start.id) >= 0) { + if (i > 0) { + temp = partitions[i]; + partitions[i] = partitions[0]; + partitions[0] = temp; } + break; + } } - return result; -} - -// '(\r\n|\x80)(to|from):([A-Za-z0-9 _."@-]+<)?[a-zA-Z0-9_.-]+@[a-zA-Z0-9_.]+>?\r\n'; -// let regex = '(\r\n|\x80)(to|from):((a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9| |_|.|"|@|-)+<)?(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_|.|-)+@(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_|.|-)+>?\r\n'; - -const key_chars = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)'; -const catch_all = '(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|!|"|#|$|%|&|\'|\\(|\\)|\\*|\\+|,|-|.|/|:|;|<|=|>|\\?|@|[|\\\\|]|^|_|`|{|\\||}|~| |\t|\n|\r|\x0b|\x0c)'; -const catch_all_without_semicolon = '(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|!|"|#|$|%|&|\'|\\(|\\)|\\*|\\+|,|-|.|/|:|<|=|>|\\?|@|[|\\\\|]|^|_|`|{|\\||}|~| |\t|\n|\r|\x0b|\x0c)'; -const base_64 = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\\+|/|=)'; -const word_char = '(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_)'; - - -function compile(regex) { - // let regex = `\r\ndkim-signature:(${key_chars}=${catch_all_without_semicolon}+; )+bh=${base_64}+; `; - - // console.log(regex); - // console.log(Buffer.from(regex).toString('base64')); - - // let regex = 'hello(0|1|2|3|4|5|6|7|8|9)+world'; - let nfa = regexToNfa(regex); - let dfa = minDfa(nfaToDfa(nfa)); - - var i, - states = {}, - nodes = [], - stack = [dfa], - symbols = [], - top; - - while (stack.length > 0) { - top = stack.pop(); - if (!states.hasOwnProperty(top.id)) { - states[top.id] = top; - top.nature = toNature(top.id); - nodes.push(top); - for (i = 0; i < top.edges.length; i += 1) { - if (top.edges[i][0] !== 'ϵ' && symbols.indexOf(top.edges[i][0]) < 0) { - symbols.push(top.edges[i][0]); - } - stack.push(top.edges[i][1]); - } - } + for (i = 0; i < partitions.length; i += 1) { + node = { + id: (i + 1).toString(), + key: partitions[i].join(","), + items: [], + symbols: [], + type: idMap[partitions[i][0]].type, + edges: [], + trans: {}, + }; + for (j = 0; j < partitions[i].length; j += 1) { + node.items.push(idMap[partitions[i][j]]); + group[partitions[i][j]] = i; + } + edges[i] = {}; + nodes.push(node); } - nodes.sort(function (a, b) { - return a.nature - b.nature; + Object.keys(revEdges).forEach(function (to) { + Object.keys(revEdges[to]).forEach(function (symbol) { + revEdges[to][symbol].forEach(function (from) { + if (!edges[group[from]].hasOwnProperty(group[to])) { + edges[group[from]][group[to]] = {}; + } + edges[group[from]][group[to]][symbol] = true; + }); + }); }); - symbols.sort(); - - let graph = []; - for (let i = 0; i < nodes.length; i += 1) { - let curr = {}; - curr.type = nodes[i].type; - curr.edges = {}; - for (let j = 0; j < symbols.length; j += 1) { - if (nodes[i].trans.hasOwnProperty(symbols[j])) { - curr.edges[symbols[j]] = nodes[i].trans[symbols[j]].nature-1; - } - } - graph[nodes[i].nature-1] = curr; - } - - return graph; + Object.keys(edges).forEach(function (from) { + Object.keys(edges[from]).forEach(function (to) { + symbol = JSON.stringify(Object.keys(edges[from][to]).sort()); + nodes[from].symbols.push(symbol); + nodes[from].edges.push([symbol, nodes[to]]); + nodes[from].trans[symbol] = nodes[to]; + }); + }); + return nodes[0]; + } + var edgesTuple = getReverseEdges(dfa), + symbols = edgesTuple[0], + idMap = edgesTuple[1], + revEdges = edgesTuple[2], + partitions = hopcroft(symbols, idMap, revEdges); + return buildMinNfa(dfa, partitions, idMap, revEdges); } -module.exports = { - compile, - key_chars, - base_64, - word_char, - catch_all, - catch_all_without_semicolon, -}; \ No newline at end of file +if (typeof require === "function") { + exports.parseRegex = parseRegex; + exports.regexToNfa = regexToNfa; + exports.nfaToDfa = nfaToDfa; + exports.minDfa = minDfa; +} diff --git a/compiler/regex_to_dfa.js b/compiler/regex_to_dfa.js new file mode 100644 index 0000000..d7733fc --- /dev/null +++ b/compiler/regex_to_dfa.js @@ -0,0 +1,214 @@ +/*jslint browser: true*/ +/*global require, exports*/ +const { minDfa, nfaToDfa, regexToNfa } = require("./lexical"); + +/** This section defines helper regex components -- to edit the regex used, edit the return + * of the test_regex function. + * All of the relevant regexes are in the main repo README. + */ + +// Helper components + +const a2z_nosep = "abcdefghijklmnopqrstuvwxyz"; +const A2Z_nosep = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; +const a2f_nosep = "abcdef"; +const A2F_nosep = "ABCDEF"; +const r0to9_nosep = "0123456789"; + +// TODO: Note that this is replicated code in lexical.js as well +// Note that ^ has to be manually replaced with \x80 in the regex +const escapeMap = { n: "\n", r: "\r", t: "\t", v: "\v", f: "\f" }; +let whitespace = Object.values(escapeMap); +const slash_s = whitespace.join("|"); + +// Escapes and prints regexes (might be buggy) +function format_regex_printable(s) { + const escaped_string_json = JSON.stringify(s); + const escaped_string = escaped_string_json.slice(1, escaped_string_json.length - 1); + return escaped_string + .replaceAll("\\\\\\\\", "\\") + .replaceAll("\\\\", "\\") + .replaceAll("\\|", "\\\\|") + .replaceAll("/", "\\/") + .replaceAll("\u000b", "\\♥") + .replaceAll("|[|", "|\\[|") + .replaceAll("|]|", "|\\]|") + .replaceAll("|.|", "|\\.|") + .replaceAll("|$|", "|\\$|") + .replaceAll("|^|", "|\\^|"); + // let escaped = escape_whitespace(escape_whitespace(s.replaceAll("\\\\", "ZZZZZZZ"))); + // let fixed = escaped.replaceAll("\\(", "(").replaceAll("\\)", ")").replaceAll("\\+", "+").replaceAll("\\*", "*").replaceAll("\\?", "?"); +} + +// Note that this is not complete and very case specific i.e. can only handle a-z and a-f, and not a-c. +// This function expands [] sections to convert values for https://zkregex.com/min_dfa +// The input is a regex with [] and special characters (i.e. the first line of min_dfa tool) +// The output is expanded regexes without any special characters +function regexToMinDFASpec(str) { + // Replace all A-Z with A2Z etc + // TODO: Upstream this to min_dfa + let combined_nosep = str + .replaceAll("A-Z", A2Z_nosep) + .replaceAll("a-z", a2z_nosep) + .replaceAll("A-F", A2F_nosep) + .replaceAll("a-f", a2f_nosep) + .replaceAll("0-9", r0to9_nosep) + .replaceAll("\\w", A2Z_nosep + r0to9_nosep + a2z_nosep + "_") + .replaceAll("\\d", r0to9_nosep) + .replaceAll("\\s", slash_s); + + function addPipeInsideBrackets(str) { + let result = ""; + let insideBrackets = false; + for (let i = 0; i < str.length; i++) { + if (str[i] === "[") { + result += str[i]; + insideBrackets = true; + continue; + } else if (str[i] === "]") { + insideBrackets = false; + } + let str_to_add = str[i]; + if (str[i] === "\\") { + i++; + str_to_add += str[i]; + } + result += insideBrackets ? "|" + str_to_add : str_to_add; + } + return result.replaceAll("[|", "[").replaceAll("[", "(").replaceAll("]", ")"); + } + + // function makeCurlyBracesFallback(str) { + // let result = ""; + // let insideBrackets = false; + // for (let i = 0; i < str.length; i++) { + // if (str[i] === "{") { + // result += str[i]; + // insideBrackets = true; + // continue; + // } else if (str[i] === "}") { + // insideBrackets = false; + // } + // result += insideBrackets ? "|" + str[i] : str[i]; + // } + // return result.replaceAll("[|", "[").replaceAll("[", "(").replaceAll("]", ")"); + // } + + function checkIfBracketsHavePipes(str) { + let result = true; + let insideBrackets = false; + let insideParens = 0; + let indexAt = 0; + for (let i = 0; i < str.length; i++) { + if (indexAt >= str.length) break; + if (str[indexAt] === "[") { + insideBrackets = true; + indexAt++; + continue; + } else if (str[indexAt] === "]") { + insideBrackets = false; + } + if (str[indexAt] === "(") { + insideParens++; + } else if (str[indexAt] === ")") { + insideParens--; + } + if (insideBrackets) { + if (str[indexAt] === "|") { + indexAt++; + } else { + result = false; + return result; + } + } + if (!insideParens && str[indexAt] === "|") { + console.log("Error: | outside of parens!"); + } + if (str[indexAt] === "\\") { + indexAt++; + } + indexAt++; + } + return result; + } + + let combined; + if (!checkIfBracketsHavePipes(combined_nosep)) { + // console.log("Adding pipes within brackets between everything!"); + combined = addPipeInsideBrackets(combined_nosep); + if (!checkIfBracketsHavePipes(combined)) { + console.log("Did not add brackets correctly!"); + } + } else { + combined = combined_nosep; + } + return combined; +} + +function toNature(col) { + var i, + j, + base = "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + result = 0; + if ("1" <= col[0] && col[0] <= "9") { + result = parseInt(col, 10); + } else { + for (i = 0, j = col.length - 1; i < col.length; i += 1, j -= 1) { + result += Math.pow(base.length, j) * (base.indexOf(col[i]) + 1); + } + } + return result; +} + +function regexToGraph(regex) { + let nfa = regexToNfa(regex); + let dfa = minDfa(nfaToDfa(nfa)); + + var i, + j, + states = {}, + nodes = [], + stack = [dfa], + symbols = [], + top; + + while (stack.length > 0) { + top = stack.pop(); + if (!states.hasOwnProperty(top.id)) { + states[top.id] = top; + top.nature = toNature(top.id); + nodes.push(top); + for (i = 0; i < top.edges.length; i += 1) { + if (top.edges[i][0] !== "ϵ" && symbols.indexOf(top.edges[i][0]) < 0) { + symbols.push(top.edges[i][0]); + } + stack.push(top.edges[i][1]); + } + } + } + nodes.sort(function (a, b) { + return a.nature - b.nature; + }); + symbols.sort(); + + let graph = []; + for (let i = 0; i < nodes.length; i += 1) { + let curr = {}; + curr.type = nodes[i].type; + curr.edges = {}; + for (let j = 0; j < symbols.length; j += 1) { + if (nodes[i].trans.hasOwnProperty(symbols[j])) { + curr.edges[symbols[j]] = nodes[i].trans[symbols[j]].nature - 1; + } + } + graph[nodes[i].nature - 1] = curr; + } + + return JSON.stringify(graph); +} + +if (typeof require === "function") { + exports.regexToMinDFASpec = regexToMinDFASpec; + exports.toNature = toNature; + exports.regexToGraph = regexToGraph +} diff --git a/test/regex-compiler.test.js b/test/regex-compiler.test.js index 173d902..de5b964 100644 --- a/test/regex-compiler.test.js +++ b/test/regex-compiler.test.js @@ -1,5 +1,5 @@ const fs = require('fs'); -const {expect} = require('chai'); +const { expect } = require('chai'); const path = require('path'); const circom_tester = require('circom_tester'); const generator = require('../compiler/gen'); @@ -119,9 +119,26 @@ describe('regex compiler tests', function () { ], ] ], + [ + ['(\r\n|\x80)(to|from):([a-zA-Z0-9 _."@-]+<)?[a-zA-Z0-9_.-]+@[a-zA-Z0-9_.-]+>?\r\n', 2], + [ + [ + 'from to email header', + convertMsg(fs.readFileSync(path.join(__dirname, 'header.fixture.txt'), 'utf8')), + 0, + (signals) => { + expect(signals.main.entire_count).to.equal(2n); + expect(signals.main.group_match_count).to.equal(2n); + expect(signals.main.start_idx).to.equal(54n); + const expected_reveal = encodeString('verify'); + assert_reveal(signals, expected_reveal); + } + ], + ] + ], [ ['dkim-signature:((a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)=(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|!|"|#|$|%|&|\'|\\(|\\)|\\*|\\+|,|-|.|\\/|:|<|=|>|\\?|@|\\[|\\\\|\\]|^|_|`|{|\\||}|~| |\t|\n' + - '|\r|\x0B|\f)+; )+bh=(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\\+|\\/|=)+; ', 2], + '|\r|\x0B|\f)+; )+bh=(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\\+|\\/|=)+; ', 2], [ [ 'assert body hash', @@ -147,12 +164,12 @@ describe('regex compiler tests', function () { describe(`/${regex}/ > group idx: ${group_idx} > ${testCircomFile}`, () => { before(async function () { await generator.generateCircuit( - regex, + regex, '../circuits' ); circuit = await wasm_tester( path.join(__dirname, 'circuits', testCircomFile), - {recompile: process.env.NO_COMPILE ? false : true, output: `${__dirname}/../build/`, O: 0} + { recompile: process.env.NO_COMPILE ? false : true, output: `${__dirname}/../build/`, O: 0 } ); }); tests.forEach((test) => { @@ -160,10 +177,10 @@ describe('regex compiler tests', function () { const content = test[1]; const match_idx = test[2]; const checkSignals = test[3]; - + describe(name, () => { - it('checks witness', async function() { - let witness = await circuit.calculateWitness({msg: content, match_idx}); + it('checks witness', async function () { + let witness = await circuit.calculateWitness({ msg: content, match_idx }); const signals = await circuit.getJSONOutput('main', witness); checkSignals(signals); await circuit.checkConstraints(witness); @@ -173,22 +190,6 @@ describe('regex compiler tests', function () { }); }); - describe('exceptions', () => { - it('character class not supported', async () => { - try { - await generator.generateCircuit( - '[a-z]', - '../circuits' - ); - } - catch (e) { - expect(e.message).to.equal('CharacterClass not supported'); - return; - } - - expect.fail('should have thrown'); - }); - }); }); function encodeString(str) { diff --git a/test/regex-compiler.test.ts b/test/regex-compiler.test.ts deleted file mode 100644 index 2c3d9a1..0000000 --- a/test/regex-compiler.test.ts +++ /dev/null @@ -1,218 +0,0 @@ -const fs = require('fs'); -import { expect } from 'chai'; -const path = require('path') -const circom_tester = require('circom_tester'); -const generator = require('../compiler/gen') -const wasm_tester = circom_tester.wasm; - -describe("regex compiler tests", function () { - [ - [ - [`1=(a|b) (2=(b|c)+ )+d`, 0], - [ - [ - '1 entire match and 1st sub-group match', - convertMsg(`1=a 2=b 2=bc 2=c d`), - 0, - (signals: any) => { - expect(signals.main.entire_count).to.equal(1n) - expect(signals.main.group_match_count).to.equal(1n) - expect(signals.main.start_idx).to.equal(2n) - const expected_reveal = encodeString('a') - assert_reveal(signals, expected_reveal); - } - ], - ] - ], - [ - [`1=(a|b) (2=(b|c)+ )+d`, 1], - [ - [ - '1 entire match and 1st sub-group match', - convertMsg(`1=a 2=b 2=bc 2=c d`), - 0, - (signals: any) => { - expect(signals.main.entire_count).to.equal(1n) - expect(signals.main.group_match_count).to.equal(3n) - expect(signals.main.start_idx).to.equal(6n) - const expected_reveal = encodeString('b') - assert_reveal(signals, expected_reveal); - } - ], - [ - '1 entire match and 2nd sub-group match', - convertMsg(`1=a 2=b 2=bc 2=c d`), - 1, - (signals: any) => { - expect(signals.main.entire_count).to.equal(1n) - expect(signals.main.group_match_count).to.equal(3n) - expect(signals.main.start_idx).to.equal(10n) - const expected_reveal = encodeString('bc') - assert_reveal(signals, expected_reveal); - } - ], - [ - '1 entire match and 3rd sub-group match', - convertMsg(`1=a 2=b 2=bc 2=c d`), - 2, - (signals: any) => { - expect(signals.main.entire_count).to.equal(1n) - expect(signals.main.group_match_count).to.equal(3n) - expect(signals.main.start_idx).to.equal(15n) - const expected_reveal = encodeString('c') - assert_reveal(signals, expected_reveal); - } - ], - [ - '0 entire match and 2 group matches', - convertMsg(`1=a 2=b 2=bc 2=e d`), - 1, - (signals: any) => { - expect(signals.main.entire_count).to.equal(0n) - expect(signals.main.group_match_count).to.equal(2n) - } - ], - [ - '2 entire match and 2nd sub-group match', - convertMsg(`1=a 2=b 2=bc 2=c da 1=a 2=cb 2=c 2=b dd`), - 1, - (signals: any) => { - expect(signals.main.entire_count).to.equal(2n) - expect(signals.main.group_match_count).to.equal(6n) - expect(signals.main.start_idx).to.equal(10n) - const expected_reveal = encodeString('bc') - assert_reveal(signals, expected_reveal); - } - ], - // todo TOFIX - // [ - // '1 entire match and 1+ group matches with no trails behind the last group', - // convertMsg(`1=a 2=b 2=bc 2=c `), - // [`1=(a|b) (2=(b|c)+ )+`, 1, 1], - // (signals: any) => { - // for (let i = 0; i < signals.main.states.length; i++) { - // console.log(signals.main.states[i][8]) - // } - // expect(signals.main.entire_count).to.equal(1n) - // expect(signals.main.group_match_count).to.equal(3n) - // expect(signals.main.start_idx).to.equal(10n) - // const expected_reveal = 'bc'.split('').map((x: any) => BigInt(x.charCodeAt(0))) - // assert_reveal(signals, expected_reveal); - // } - // ], - ] - ], - [ - ['(\r\n|\x80)(to|from):((a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9| |_|.|"|@|-)+<)?(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_|.|-)+@(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_|.|-)+>?\r\n', 2], - [ - [ - 'from to email header', - convertMsg(fs.readFileSync(path.join(__dirname, 'header.fixture.txt'), 'utf8')), - 0, - (signals: any) => { - expect(signals.main.entire_count).to.equal(2n) - expect(signals.main.group_match_count).to.equal(2n) - expect(signals.main.start_idx).to.equal(54n) - const expected_reveal = encodeString('verify') - assert_reveal(signals, expected_reveal); - } - ], - ] - ], - [ - ['dkim-signature:((a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)=(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|!|"|#|$|%|&|\'|\\(|\\)|\\*|\\+|,|-|.|\\/|:|<|=|>|\\?|@|\\[|\\\\|\\]|^|_|`|{|\\||}|~| |\t|\n' + - '|\r|\x0B|\f)+; )+bh=(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\\+|\\/|=)+; ', 2], - [ - [ - 'assert body hash', - convertMsg("\r\ndkim-signature:v=1; a=rsa-sha256; c=relaxed/relaxed; d=twitter.com; s=dkim-201406; t=1671865957; bh=hEMyi6n9V0N6aGtz3lEc6fQBlZRVUok/tkwpRCmrnaa=; h=Date:From:To:Subject:MIME-Version:Content-Type:Message-ID; b="), - 0, - (signals: any) => { - expect(signals.main.entire_count).to.equal(1n) - expect(signals.main.group_match_count).to.equal(1n) - const expected_reveal = encodeString('hEMyi6n9V0N6aGtz3lEc6fQBlZRVUok/tkwpRCmrnaa=') - assert_reveal(signals, expected_reveal); - } - ] - ] - ], - ] - .forEach((regexSuite: any) => { - const regex = regexSuite[0][0] - const group_idx = regexSuite[0][1] - const tests: any = regexSuite[1] - - const testCircomFile = `test_regex_compiler_group_${group_idx}.circom` - let circuit: any; - describe(`/${regex}/ > group idx: ${group_idx} > ${testCircomFile}`, () => { - before(async function () { - await generator.generateCircuit( - regex, - '../circuits' - ) - circuit = await wasm_tester( - path.join(__dirname, "circuits", testCircomFile), - {recompile: process.env.NO_COMPILE ? false : true, output: `${__dirname}/../build/`, O: 0} - ); - }); - tests.forEach((test: any) => { - //@ts-ignore - const name: string = test[0] - //@ts-ignore - const content: string = test[1] - //@ts-ignore - const match_idx: number = test[2] - //@ts-ignore - const checkSignals: Function = test[3] - - describe(name, () => { - it('checks witness', async function() { - let witness = await circuit.calculateWitness({msg: content, match_idx}); - const signals = await circuit.getJSONOutput('main', witness); - checkSignals(signals) - await circuit.checkConstraints(witness); - }); - }); - }) - }); - }) - - describe('exceptions', () => { - it('character class not supported', async () => { - try { - await generator.generateCircuit( - '[a-z]', - '../circuits' - ) - } - catch (e: any) { - expect(e.message).to.equal('CharacterClass not supported') - return - } - - expect.fail('should have thrown') - }); - }); -}); - -function encodeString(str: string) { - return str.split('').map((x: any) => BigInt(x.charCodeAt(0))); -} - -function convertMsg(msg: string, maxLen: number = 1536) { - let msgEncoded = msg.split('').map((x: any) => x.charCodeAt(0)); - while (msgEncoded.length < maxLen) { - msgEncoded.push(0); - } - msgEncoded = msgEncoded.map((x: any) => `${x}`); - return msgEncoded; -} - -function assert_reveal(signals: any, expected_reveal: bigint[]) { - for (let m in signals.main.reveal_shifted) { - const value = signals.main.reveal_shifted[m]; - if (expected_reveal[m as any]) { - expect(value).to.equal(expected_reveal[m as any]); - } - } -} From 03840ce99cb0524d2957a93cebb2bfa23f61ebb2 Mon Sep 17 00:00:00 2001 From: javiersuweijie Date: Thu, 21 Sep 2023 16:54:39 +0800 Subject: [PATCH 2/3] revert formatting changes --- compiler/lexical.js | 892 +++++++++++++++++++++----------------------- 1 file changed, 433 insertions(+), 459 deletions(-) diff --git a/compiler/lexical.js b/compiler/lexical.js index 7e0627e..0e21a1b 100644 --- a/compiler/lexical.js +++ b/compiler/lexical.js @@ -1,3 +1,6 @@ +/* eslint-disable no-prototype-builtins */ +/*jslint browser: true*/ + /** * Try parsing simple regular expression to syntax tree. * @@ -16,534 +19,505 @@ * @param {string} text The input regular expression * @return {string|object} Returns a string that is an error message if failed to parse the expression, * otherwise returns an object which is the syntax tree. - * - * Edited from https://github.com/CyberZHG/toolbox/blob/gh-pages/js/lexical.js */ function parseRegex(text) { - "use strict"; + 'use strict'; function parseSub(text, begin, end, first) { - var i, - sub, - last = 0, - node = { begin: begin, end: end }, - virNode, - tempNode, - stack = 0, - parts = []; - if (text.length === 0) { - return "Error: empty input at " + begin + "."; - } - if (first) { - for (i = 0; i <= text.length; i += 1) { - if (i === text.length || (text[i] === "|" && stack === 0)) { - if (last === 0 && i === text.length) { - return parseSub(text, begin + last, begin + i, false); - } - sub = parseSub(text.slice(last, i), begin + last, begin + i, true); - if (typeof sub === "string") { - return sub; - } - parts.push(sub); - last = i + 1; - } else if (text[i] === "(") { - stack += 1; - } else if (text[i] === ")") { - stack -= 1; - } - } - if (parts.length === 1) { - return parts[0]; + var i, + sub, + last = 0, + node = {'begin': begin, 'end': end}, + virNode, + tempNode, + stack = 0, + parts = []; + if (text.length === 0) { + return 'Error: empty input at ' + begin + '.'; } - node.type = "or"; - node.parts = parts; - } else { - for (i = 0; i < text.length; i += 1) { - if (text[i] === "(") { - last = i + 1; - i += 1; - stack = 1; - while (i < text.length && stack !== 0) { - if (text[i] === "(") { - stack += 1; - } else if (text[i] === ")") { - stack -= 1; - } - i += 1; - } - if (stack !== 0) { - return "Error: missing right bracket for " + (begin + last) + "."; - } - i -= 1; - sub = parseSub(text.slice(last, i), begin + last, begin + i, true); - if (typeof sub === "string") { - return sub; + if (first) { + for (i = 0; i <= text.length; i += 1) { + if (i === text.length || (text[i] === '|' && stack === 0)) { + if (last === 0 && i === text.length) { + return parseSub(text, begin + last, begin + i, false); + } + sub = parseSub(text.slice(last, i), begin + last, begin + i, true); + if (typeof sub === 'string') { + return sub; + } + parts.push(sub); + last = i + 1; + } else if (text[i] === '(') { + stack += 1; + } else if (text[i] === ')') { + stack -= 1; + } } - sub.begin -= 1; - sub.end += 1; - parts.push(sub); - } else if (text[i] === "*") { - if (parts.length === 0) { - return "Error: unexpected * at " + (begin + i) + "."; + if (parts.length === 1) { + return parts[0]; } - tempNode = { begin: parts[parts.length - 1].begin, end: parts[parts.length - 1].end + 1 }; - tempNode.type = "star"; - tempNode.sub = parts[parts.length - 1]; - parts[parts.length - 1] = tempNode; - } else if (text[i] === "+") { - if (parts.length === 0) { - return "Error: unexpected + at " + (begin + i) + "."; + node.type = 'or'; + node.parts = parts; + } else { + for (i = 0; i < text.length; i += 1) { + if (text[i] === '(') { + last = i + 1; + i += 1; + stack = 1; + while (i < text.length && stack !== 0) { + if (text[i] === '(') { + stack += 1; + } else if (text[i] === ')') { + stack -= 1; + } + i += 1; + } + if (stack !== 0) { + return 'Error: missing right bracket for ' + (begin + last) + '.'; + } + i -= 1; + sub = parseSub(text.slice(last, i), begin + last, begin + i, true); + if (typeof sub === 'string') { + return sub; + } + sub.begin -= 1; + sub.end += 1; + parts.push(sub); + } else if (text[i] === '*') { + if (parts.length === 0) { + return 'Error: unexpected * at ' + (begin + i) + '.'; + } + tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; + tempNode.type = 'star'; + tempNode.sub = parts[parts.length - 1]; + parts[parts.length - 1] = tempNode; + } else if (text[i] === '+') { + if (parts.length === 0) { + return 'Error: unexpected + at ' + (begin + i) + '.'; + } + virNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; + virNode.type = 'star'; + virNode.sub = parts[parts.length - 1]; + tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; + tempNode.type = 'cat'; + tempNode.parts = [parts[parts.length - 1], virNode]; + parts[parts.length - 1] = tempNode; + } else if (text[i] === '?') { + if (parts.length === 0) { + return 'Error: unexpected + at ' + (begin + i) + '.'; + } + virNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; + virNode.type = 'empty'; + virNode.sub = parts[parts.length - 1]; + tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; + tempNode.type = 'or'; + tempNode.parts = [parts[parts.length - 1], virNode]; + parts[parts.length - 1] = tempNode; + } else if (text[i] === 'ϵ') { + tempNode = {'begin': begin + i, 'end': begin + i + 1}; + tempNode.type = 'empty'; + parts.push(tempNode); + } else if (Array.isArray(text[i])) { + tempNode = {'begin': begin + i, 'end': begin + i + 1}; + tempNode.type = 'text'; + tempNode.text = text[i][0]; + parts.push(tempNode); + } else { + tempNode = {'begin': begin + i, 'end': begin + i + 1}; + tempNode.type = 'text'; + tempNode.text = text[i]; + parts.push(tempNode); + } } - virNode = { begin: parts[parts.length - 1].begin, end: parts[parts.length - 1].end + 1 }; - virNode.type = "star"; - virNode.sub = parts[parts.length - 1]; - tempNode = { begin: parts[parts.length - 1].begin, end: parts[parts.length - 1].end + 1 }; - tempNode.type = "cat"; - tempNode.parts = [parts[parts.length - 1], virNode]; - parts[parts.length - 1] = tempNode; - } else if (text[i] === "?") { - if (parts.length === 0) { - return "Error: unexpected + at " + (begin + i) + "."; + if (parts.length === 1) { + return parts[0]; } - virNode = { begin: parts[parts.length - 1].begin, end: parts[parts.length - 1].end + 1 }; - virNode.type = "empty"; - virNode.sub = parts[parts.length - 1]; - tempNode = { begin: parts[parts.length - 1].begin, end: parts[parts.length - 1].end + 1 }; - tempNode.type = "or"; - tempNode.parts = [parts[parts.length - 1], virNode]; - parts[parts.length - 1] = tempNode; - } else if (text[i] === "ϵ") { - tempNode = { begin: begin + i, end: begin + i + 1 }; - tempNode.type = "empty"; - parts.push(tempNode); - } else if (Array.isArray(text[i])) { - tempNode = { begin: begin + i, end: begin + i + 1 }; - tempNode.type = "text"; - tempNode.text = text[i][0]; - parts.push(tempNode); - } else { - tempNode = { begin: begin + i, end: begin + i + 1 }; - tempNode.type = "text"; - tempNode.text = text[i]; - parts.push(tempNode); - } - } - if (parts.length === 1) { - return parts[0]; + node.type = 'cat'; + node.parts = parts; } - node.type = "cat"; - node.parts = parts; - } - return node; + return node; } let new_text = []; let i = 0; while (i < text.length) { - if (text[i] === "\\") { - const escapeMap = { n: "\n", r: "\r", t: "\t", v: "\v", f: "\f", "^": String.fromCharCode(128) }; - const char = text[i + 1]; - new_text.push([escapeMap[char] || char]); - i += 2; - } else { - new_text.push(text[i]); - i += 1; - } + if (text[i] == '\\') { + const escapeMap = { n: '\n', r: '\r', t: '\t', v: '\v', f: '\f', '^': String.fromCharCode(128) }; + const char = text[i + 1]; + new_text.push([escapeMap[char] || char]); + i += 2; + } else { + new_text.push(text[i]); + i += 1; + } } return parseSub(new_text, 0, new_text.length, true); } /** - * Convert regular expression to nondeterministic finite automaton. - * - * @param {string} text @see parseRegex() - * @return {object|string} - */ +* Convert regular expression to nondeterministic finite automaton. +* +* @param {string} text @see parseRegex() +* @return {object|string} +*/ function regexToNfa(text) { - "use strict"; + 'use strict'; function generateGraph(node, start, end, count) { - var i, last, temp, tempStart, tempEnd; - if (!start.hasOwnProperty("id")) { - start.id = count; - count += 1; - } - switch (node.type) { - case "empty": - start.edges.push(["ϵ", end]); - break; - case "text": - start.edges.push([node.text, end]); - break; - case "cat": - last = start; - for (i = 0; i < node.parts.length - 1; i += 1) { - temp = { type: "", edges: [] }; - count = generateGraph(node.parts[i], last, temp, count); - last = temp; - } - count = generateGraph(node.parts[node.parts.length - 1], last, end, count); - break; - case "or": - for (i = 0; i < node.parts.length; i += 1) { - tempStart = { type: "", edges: [] }; - tempEnd = { type: "", edges: [["ϵ", end]] }; - start.edges.push(["ϵ", tempStart]); - count = generateGraph(node.parts[i], tempStart, tempEnd, count); - } - break; - case "star": - tempStart = { type: "", edges: [] }; - tempEnd = { - type: "", - edges: [ - ["ϵ", tempStart], - ["ϵ", end], - ], - }; - start.edges.push(["ϵ", tempStart]); - start.edges.push(["ϵ", end]); - count = generateGraph(node.sub, tempStart, tempEnd, count); - break; - } - if (!end.hasOwnProperty("id")) { - end.id = count; - count += 1; - } - return count; + var i, last, temp, tempStart, tempEnd; + if (!start.hasOwnProperty('id')) { + start.id = count; + count += 1; + } + switch (node.type) { + case 'empty': + start.edges.push(['ϵ', end]); + break; + case 'text': + start.edges.push([node.text, end]); + break; + case 'cat': + last = start; + for (i = 0; i < node.parts.length - 1; i += 1) { + temp = {'type': '', 'edges': []}; + count = generateGraph(node.parts[i], last, temp, count); + last = temp; + } + count = generateGraph(node.parts[node.parts.length - 1], last, end, count); + break; + case 'or': + for (i = 0; i < node.parts.length; i += 1) { + tempStart = {'type': '', 'edges': []}; + tempEnd = {'type': '', 'edges': [['ϵ', end]]}; + start.edges.push(['ϵ', tempStart]); + count = generateGraph(node.parts[i], tempStart, tempEnd, count); + } + break; + case 'star': + tempStart = {'type': '', 'edges': []}; + tempEnd = {'type': '', 'edges': [['ϵ', tempStart], ['ϵ', end]]}; + start.edges.push(['ϵ', tempStart]); + start.edges.push(['ϵ', end]); + count = generateGraph(node.sub, tempStart, tempEnd, count); + break; + } + if (!end.hasOwnProperty('id')) { + end.id = count; + count += 1; + } + return count; } var ast = parseRegex(text), - start = { type: "start", edges: [] }, - accept = { type: "accept", edges: [] }; - if (typeof ast === "string") { - return ast; + start = {'type': 'start', 'edges': []}, + accept = {'type': 'accept', 'edges': []}; + if (typeof ast === 'string') { + return ast; } generateGraph(ast, start, accept, 0); return start; } /** - * Convert nondeterministic finite automaton to deterministic finite automaton. - * - * @param {object} nfa @see regexToNfa(), the function assumes that the given NFA is valid. - * @return {object} dfa Returns the first element of the DFA. - */ +* Convert nondeterministic finite automaton to deterministic finite automaton. +* +* @param {object} nfa @see regexToNfa(), the function assumes that the given NFA is valid. +* @return {object} dfa Returns the first element of the DFA. +*/ function nfaToDfa(nfa) { - "use strict"; + 'use strict'; function getClosure(nodes) { - var i, - closure = [], - stack = [], - symbols = [], - type = "", - top; - for (i = 0; i < nodes.length; i += 1) { - stack.push(nodes[i]); - closure.push(nodes[i]); - if (nodes[i].type === "accept") { - type = "accept"; - } - } - while (stack.length > 0) { - top = stack.pop(); - // If top is of type string and starts with "Error" then return error - if (typeof top === "string" && top[0] === "E") { - console.log(top); - continue; + var i, + closure = [], + stack = [], + symbols = [], + type = '', + top; + for (i = 0; i < nodes.length; i += 1) { + stack.push(nodes[i]); + closure.push(nodes[i]); + if (nodes[i].type === 'accept') { + type = 'accept'; + } } - for (i = 0; i < top.edges.length; i += 1) { - if (top.edges[i][0] === "ϵ") { - if (closure.indexOf(top.edges[i][1]) < 0) { - stack.push(top.edges[i][1]); - closure.push(top.edges[i][1]); - if (top.edges[i][1].type === "accept") { - type = "accept"; - } + while (stack.length > 0) { + top = stack.pop(); + // If top is of type string and starts with "Error" then return error + if (typeof top === 'string' && top[0] === 'E') { + console.log(top); + continue; } - } else { - if (symbols.indexOf(top.edges[i][0]) < 0) { - symbols.push(top.edges[i][0]); + for (i = 0; i < top.edges.length; i += 1) { + if (top.edges[i][0] === 'ϵ') { + if (closure.indexOf(top.edges[i][1]) < 0) { + stack.push(top.edges[i][1]); + closure.push(top.edges[i][1]); + if (top.edges[i][1].type === 'accept') { + type = 'accept'; + } + } + } else { + if (symbols.indexOf(top.edges[i][0]) < 0) { + symbols.push(top.edges[i][0]); + } + } } - } } - } - closure.sort(function (a, b) { - return a.id - b.id; - }); - symbols.sort(); - return { - key: closure - .map(function (x) { - return x.id; - }) - .join(","), - items: closure, - symbols: symbols, - type: type, - edges: [], - trans: {}, - }; + closure.sort(function (a, b) { + return a.id - b.id; + }); + symbols.sort(); + return { + 'key': closure.map(function (x) { + return x.id; + }).join(','), + 'items': closure, + 'symbols': symbols, + 'type': type, + 'edges': [], + 'trans': {} + }; } function getClosedMove(closure, symbol) { - var i, - j, - node, - nexts = []; - for (i = 0; i < closure.items.length; i += 1) { - node = closure.items[i]; - for (j = 0; j < node.edges.length; j += 1) { - if (symbol === node.edges[j][0]) { - if (nexts.indexOf(node.edges[j][1]) < 0) { - nexts.push(node.edges[j][1]); + var i, + j, + node, + nexts = []; + for (i = 0; i < closure.items.length; i += 1) { + node = closure.items[i]; + for (j = 0; j < node.edges.length; j += 1) { + if (symbol === node.edges[j][0]) { + if (nexts.indexOf(node.edges[j][1]) < 0) { + nexts.push(node.edges[j][1]); + } + } } - } } - } - return getClosure(nexts); + return getClosure(nexts); } function toAlphaCount(n) { - var a = "A".charCodeAt(0), - z = "Z".charCodeAt(0), - len = z - a + 1, - s = ""; - while (n >= 0) { - s = String.fromCharCode((n % len) + a) + s; - n = Math.floor(n / len) - 1; - } - return s; + var a = 'A'.charCodeAt(0), + z = 'Z'.charCodeAt(0), + len = z - a + 1, + s = ''; + while (n >= 0) { + s = String.fromCharCode(n % len + a) + s; + n = Math.floor(n / len) - 1; + } + return s; } var i, - first = getClosure([nfa]), - states = {}, - front = 0, - top, - closure, - queue = [first], - count = 0; + first = getClosure([nfa]), + states = {}, + front = 0, + top, + closure, + queue = [first], + count = 0; first.id = toAlphaCount(count); states[first.key] = first; while (front < queue.length) { - top = queue[front]; - front += 1; - for (i = 0; i < top.symbols.length; i += 1) { - closure = getClosedMove(top, top.symbols[i]); - if (!states.hasOwnProperty(closure.key)) { - count += 1; - closure.id = toAlphaCount(count); - states[closure.key] = closure; - queue.push(closure); + top = queue[front]; + front += 1; + for (i = 0; i < top.symbols.length; i += 1) { + closure = getClosedMove(top, top.symbols[i]); + if (!states.hasOwnProperty(closure.key)) { + count += 1; + closure.id = toAlphaCount(count); + states[closure.key] = closure; + queue.push(closure); + } + top.trans[top.symbols[i]] = states[closure.key]; + top.edges.push([top.symbols[i], states[closure.key]]); } - top.trans[top.symbols[i]] = states[closure.key]; - top.edges.push([top.symbols[i], states[closure.key]]); - } } return first; } /** - * Convert the DFA to its minimum form using Hopcroft's algorithm. - * - * @param {object} dfa @see nfaToDfa(), the function assumes that the given DFA is valid. - * @return {object} dfa Returns the first element of the minimum DFA. - */ +* Convert the DFA to its minimum form using Hopcroft's algorithm. +* +* @param {object} dfa @see nfaToDfa(), the function assumes that the given DFA is valid. +* @return {object} dfa Returns the first element of the minimum DFA. +*/ function minDfa(dfa) { - "use strict"; + 'use strict'; function getReverseEdges(start) { - var i, - top, - symbol, - next, - front = 0, - queue = [start], - visited = {}, - symbols = {}, // The input alphabet - idMap = {}, // Map id to states - revEdges = {}; // Map id to the ids which connects to the id with an alphabet - visited[start.id] = true; - while (front < queue.length) { - top = queue[front]; - front += 1; - idMap[top.id] = top; - for (i = 0; i < top.symbols.length; i += 1) { - symbol = top.symbols[i]; - if (!symbols.hasOwnProperty(symbol)) { - symbols[symbol] = true; - } - next = top.trans[symbol]; - if (!revEdges.hasOwnProperty(next.id)) { - revEdges[next.id] = {}; - } - if (!revEdges[next.id].hasOwnProperty(symbol)) { - revEdges[next.id][symbol] = []; - } - revEdges[next.id][symbol].push(top.id); - if (!visited.hasOwnProperty(next.id)) { - visited[next.id] = true; - queue.push(next); - } + var i, top, symbol, next, + front = 0, + queue = [start], + visited = {}, + symbols = {}, // The input alphabet + idMap = {}, // Map id to states + revEdges = {}; // Map id to the ids which connects to the id with an alphabet + visited[start.id] = true; + while (front < queue.length) { + top = queue[front]; + front += 1; + idMap[top.id] = top; + for (i = 0; i < top.symbols.length; i += 1) { + symbol = top.symbols[i]; + if (!symbols.hasOwnProperty(symbol)) { + symbols[symbol] = true; + } + next = top.trans[symbol]; + if (!revEdges.hasOwnProperty(next.id)) { + revEdges[next.id] = {}; + } + if (!revEdges[next.id].hasOwnProperty(symbol)) { + revEdges[next.id][symbol] = []; + } + revEdges[next.id][symbol].push(top.id); + if (!visited.hasOwnProperty(next.id)) { + visited[next.id] = true; + queue.push(next); + } + } } - } - return [Object.keys(symbols), idMap, revEdges]; + return [Object.keys(symbols), idMap, revEdges]; } function hopcroft(symbols, idMap, revEdges) { - var i, - j, - k, - keys, - key, - key1, - key2, - top, - group1, - group2, - symbol, - revGroup, - ids = Object.keys(idMap).sort(), - partitions = {}, - front = 0, - queue = [], - visited = {}; - group1 = []; - group2 = []; - for (i = 0; i < ids.length; i += 1) { - if (idMap[ids[i]].type === "accept") { - group1.push(ids[i]); - } else { - group2.push(ids[i]); + var i, j, k, keys, key, key1, key2, top, group1, group2, symbol, revGroup, + ids = Object.keys(idMap).sort(), + partitions = {}, + front = 0, + queue = [], + visited = {}; + group1 = []; + group2 = []; + for (i = 0; i < ids.length; i += 1) { + if (idMap[ids[i]].type === 'accept') { + group1.push(ids[i]); + } else { + group2.push(ids[i]); + } } - } - key = group1.join(","); - partitions[key] = group1; - queue.push(key); - visited[key] = 0; - if (group2.length !== 0) { - key = group2.join(","); - partitions[key] = group2; + key = group1.join(','); + partitions[key] = group1; queue.push(key); - } - while (front < queue.length) { - top = queue[front]; - front += 1; - if (top) { - top = top.split(","); - for (i = 0; i < symbols.length; i += 1) { - symbol = symbols[i]; - revGroup = {}; - for (j = 0; j < top.length; j += 1) { - if (revEdges.hasOwnProperty(top[j]) && revEdges[top[j]].hasOwnProperty(symbol)) { - for (k = 0; k < revEdges[top[j]][symbol].length; k += 1) { - revGroup[revEdges[top[j]][symbol][k]] = true; - } - } - } - keys = Object.keys(partitions); - for (j = 0; j < keys.length; j += 1) { - key = keys[j]; - group1 = []; - group2 = []; - for (k = 0; k < partitions[key].length; k += 1) { - if (revGroup.hasOwnProperty(partitions[key][k])) { - group1.push(partitions[key][k]); - } else { - group2.push(partitions[key][k]); - } - } - if (group1.length !== 0 && group2.length !== 0) { - delete partitions[key]; - key1 = group1.join(","); - key2 = group2.join(","); - partitions[key1] = group1; - partitions[key2] = group2; - if (visited.hasOwnProperty(key1)) { - queue[visited[key1]] = null; - visited[key1] = queue.length; - queue.push(key1); - visited[key2] = queue.length; - queue.push(key2); - } else if (group1.length <= group2.length) { - visited[key1] = queue.length; - queue.push(key1); - } else { - visited[key2] = queue.length; - queue.push(key2); + visited[key] = 0; + if (group2.length !== 0) { + key = group2.join(','); + partitions[key] = group2; + queue.push(key); + } + while (front < queue.length) { + top = queue[front]; + front += 1; + if (top) { + top = top.split(','); + for (i = 0; i < symbols.length; i += 1) { + symbol = symbols[i]; + revGroup = {}; + for (j = 0; j < top.length; j += 1) { + if (revEdges.hasOwnProperty(top[j]) && revEdges[top[j]].hasOwnProperty(symbol)) { + for (k = 0; k < revEdges[top[j]][symbol].length; k += 1) { + revGroup[revEdges[top[j]][symbol][k]] = true; + } + } + } + keys = Object.keys(partitions); + for (j = 0; j < keys.length; j += 1) { + key = keys[j]; + group1 = []; + group2 = []; + for (k = 0; k < partitions[key].length; k += 1) { + if (revGroup.hasOwnProperty(partitions[key][k])) { + group1.push(partitions[key][k]); + } else { + group2.push(partitions[key][k]); + } + } + if (group1.length !== 0 && group2.length !== 0) { + delete partitions[key]; + key1 = group1.join(','); + key2 = group2.join(','); + partitions[key1] = group1; + partitions[key2] = group2; + if (visited.hasOwnProperty(key1)) { + queue[visited[key1]] = null; + visited[key1] = queue.length; + queue.push(key1); + visited[key2] = queue.length; + queue.push(key2); + } else if (group1.length <= group2.length) { + visited[key1] = queue.length; + queue.push(key1); + } else { + visited[key2] = queue.length; + queue.push(key2); + } + } + } } - } } - } } - } - return Object.values(partitions); + return Object.values(partitions); } function buildMinNfa(start, partitions, idMap, revEdges) { - var i, - j, - temp, - node, - symbol, - nodes = [], - group = {}, - edges = {}; - partitions.sort(function (a, b) { - var ka = a.join(","), - kb = b.join(","); - if (ka < kb) { - return -1; - } - if (ka > kb) { - return 1; - } - return 0; - }); - for (i = 0; i < partitions.length; i += 1) { - if (partitions[i].indexOf(start.id) >= 0) { - if (i > 0) { - temp = partitions[i]; - partitions[i] = partitions[0]; - partitions[0] = temp; - } - break; - } - } - for (i = 0; i < partitions.length; i += 1) { - node = { - id: (i + 1).toString(), - key: partitions[i].join(","), - items: [], - symbols: [], - type: idMap[partitions[i][0]].type, - edges: [], - trans: {}, - }; - for (j = 0; j < partitions[i].length; j += 1) { - node.items.push(idMap[partitions[i][j]]); - group[partitions[i][j]] = i; + var i, j, temp, node, symbol, + nodes = [], + group = {}, + edges = {}; + partitions.sort(function (a, b) { + var ka = a.join(','), kb = b.join(','); + if (ka < kb) { + return -1; + } + if (ka > kb) { + return 1; + } + return 0; + }); + for (i = 0; i < partitions.length; i += 1) { + if (partitions[i].indexOf(start.id) >= 0) { + if (i > 0) { + temp = partitions[i]; + partitions[i] = partitions[0]; + partitions[0] = temp; + } + break; + } } - edges[i] = {}; - nodes.push(node); - } - Object.keys(revEdges).forEach(function (to) { - Object.keys(revEdges[to]).forEach(function (symbol) { - revEdges[to][symbol].forEach(function (from) { - if (!edges[group[from]].hasOwnProperty(group[to])) { - edges[group[from]][group[to]] = {}; + for (i = 0; i < partitions.length; i += 1) { + node = { + id: (i + 1).toString(), + key: partitions[i].join(','), + items: [], + symbols: [], + type: idMap[partitions[i][0]].type, + edges: [], + trans: {}, + }; + for (j = 0; j < partitions[i].length; j += 1) { + node.items.push(idMap[partitions[i][j]]); + group[partitions[i][j]] = i; } - edges[group[from]][group[to]][symbol] = true; - }); + edges[i] = {}; + nodes.push(node); + } + Object.keys(revEdges).forEach(function (to) { + Object.keys(revEdges[to]).forEach(function (symbol) { + revEdges[to][symbol].forEach(function (from) { + if (!edges[group[from]].hasOwnProperty(group[to])) { + edges[group[from]][group[to]] = {}; + } + edges[group[from]][group[to]][symbol] = true; + }); + }); }); - }); - Object.keys(edges).forEach(function (from) { - Object.keys(edges[from]).forEach(function (to) { - symbol = JSON.stringify(Object.keys(edges[from][to]).sort()); - nodes[from].symbols.push(symbol); - nodes[from].edges.push([symbol, nodes[to]]); - nodes[from].trans[symbol] = nodes[to]; + Object.keys(edges).forEach(function (from) { + Object.keys(edges[from]).forEach(function (to) { + symbol = JSON.stringify(Object.keys(edges[from][to]).sort()); + nodes[from].symbols.push(symbol); + nodes[from].edges.push([symbol, nodes[to]]); + nodes[from].trans[symbol] = nodes[to]; + }); }); - }); - return nodes[0]; + return nodes[0]; } var edgesTuple = getReverseEdges(dfa), - symbols = edgesTuple[0], - idMap = edgesTuple[1], - revEdges = edgesTuple[2], - partitions = hopcroft(symbols, idMap, revEdges); + symbols = edgesTuple[0], + idMap = edgesTuple[1], + revEdges = edgesTuple[2], + partitions = hopcroft(symbols, idMap, revEdges); return buildMinNfa(dfa, partitions, idMap, revEdges); } @@ -552,4 +526,4 @@ if (typeof require === "function") { exports.regexToNfa = regexToNfa; exports.nfaToDfa = nfaToDfa; exports.minDfa = minDfa; -} +} \ No newline at end of file From 783d28bc104d2e2b919f4e4a8acaa0d76572bb05 Mon Sep 17 00:00:00 2001 From: javiersuweijie Date: Thu, 21 Sep 2023 17:11:29 +0800 Subject: [PATCH 3/3] fix linting --- compiler/gen.js | 1 - compiler/lexical.js | 950 +++++++++++++++++++-------------------- compiler/regex_to_dfa.js | 329 +++++++------- 3 files changed, 629 insertions(+), 651 deletions(-) diff --git a/compiler/gen.js b/compiler/gen.js index 9f4f888..8990bee 100644 --- a/compiler/gen.js +++ b/compiler/gen.js @@ -1,6 +1,5 @@ const fs = require('fs'); const path = require('path'); -const regexpTree = require('regexp-tree'); const assert = require('assert'); const lexical = require('./lexical'); const regex_to_dfa = require('./regex_to_dfa'); diff --git a/compiler/lexical.js b/compiler/lexical.js index 0e21a1b..b165530 100644 --- a/compiler/lexical.js +++ b/compiler/lexical.js @@ -21,136 +21,136 @@ * otherwise returns an object which is the syntax tree. */ function parseRegex(text) { - 'use strict'; - function parseSub(text, begin, end, first) { - var i, - sub, - last = 0, - node = {'begin': begin, 'end': end}, - virNode, - tempNode, - stack = 0, - parts = []; - if (text.length === 0) { - return 'Error: empty input at ' + begin + '.'; - } - if (first) { - for (i = 0; i <= text.length; i += 1) { - if (i === text.length || (text[i] === '|' && stack === 0)) { - if (last === 0 && i === text.length) { - return parseSub(text, begin + last, begin + i, false); - } - sub = parseSub(text.slice(last, i), begin + last, begin + i, true); - if (typeof sub === 'string') { - return sub; - } - parts.push(sub); - last = i + 1; - } else if (text[i] === '(') { - stack += 1; - } else if (text[i] === ')') { - stack -= 1; - } - } - if (parts.length === 1) { - return parts[0]; - } - node.type = 'or'; - node.parts = parts; - } else { - for (i = 0; i < text.length; i += 1) { - if (text[i] === '(') { - last = i + 1; - i += 1; - stack = 1; - while (i < text.length && stack !== 0) { - if (text[i] === '(') { - stack += 1; - } else if (text[i] === ')') { - stack -= 1; - } - i += 1; - } - if (stack !== 0) { - return 'Error: missing right bracket for ' + (begin + last) + '.'; - } - i -= 1; - sub = parseSub(text.slice(last, i), begin + last, begin + i, true); - if (typeof sub === 'string') { - return sub; - } - sub.begin -= 1; - sub.end += 1; - parts.push(sub); - } else if (text[i] === '*') { - if (parts.length === 0) { - return 'Error: unexpected * at ' + (begin + i) + '.'; - } - tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; - tempNode.type = 'star'; - tempNode.sub = parts[parts.length - 1]; - parts[parts.length - 1] = tempNode; - } else if (text[i] === '+') { - if (parts.length === 0) { - return 'Error: unexpected + at ' + (begin + i) + '.'; - } - virNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; - virNode.type = 'star'; - virNode.sub = parts[parts.length - 1]; - tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; - tempNode.type = 'cat'; - tempNode.parts = [parts[parts.length - 1], virNode]; - parts[parts.length - 1] = tempNode; - } else if (text[i] === '?') { - if (parts.length === 0) { - return 'Error: unexpected + at ' + (begin + i) + '.'; - } - virNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; - virNode.type = 'empty'; - virNode.sub = parts[parts.length - 1]; - tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; - tempNode.type = 'or'; - tempNode.parts = [parts[parts.length - 1], virNode]; - parts[parts.length - 1] = tempNode; - } else if (text[i] === 'ϵ') { - tempNode = {'begin': begin + i, 'end': begin + i + 1}; - tempNode.type = 'empty'; - parts.push(tempNode); - } else if (Array.isArray(text[i])) { - tempNode = {'begin': begin + i, 'end': begin + i + 1}; - tempNode.type = 'text'; - tempNode.text = text[i][0]; - parts.push(tempNode); - } else { - tempNode = {'begin': begin + i, 'end': begin + i + 1}; - tempNode.type = 'text'; - tempNode.text = text[i]; - parts.push(tempNode); - } - } - if (parts.length === 1) { - return parts[0]; - } - node.type = 'cat'; - node.parts = parts; - } - return node; - } + 'use strict'; + function parseSub(text, begin, end, first) { + var i, + sub, + last = 0, + node = {'begin': begin, 'end': end}, + virNode, + tempNode, + stack = 0, + parts = []; + if (text.length === 0) { + return 'Error: empty input at ' + begin + '.'; + } + if (first) { + for (i = 0; i <= text.length; i += 1) { + if (i === text.length || (text[i] === '|' && stack === 0)) { + if (last === 0 && i === text.length) { + return parseSub(text, begin + last, begin + i, false); + } + sub = parseSub(text.slice(last, i), begin + last, begin + i, true); + if (typeof sub === 'string') { + return sub; + } + parts.push(sub); + last = i + 1; + } else if (text[i] === '(') { + stack += 1; + } else if (text[i] === ')') { + stack -= 1; + } + } + if (parts.length === 1) { + return parts[0]; + } + node.type = 'or'; + node.parts = parts; + } else { + for (i = 0; i < text.length; i += 1) { + if (text[i] === '(') { + last = i + 1; + i += 1; + stack = 1; + while (i < text.length && stack !== 0) { + if (text[i] === '(') { + stack += 1; + } else if (text[i] === ')') { + stack -= 1; + } + i += 1; + } + if (stack !== 0) { + return 'Error: missing right bracket for ' + (begin + last) + '.'; + } + i -= 1; + sub = parseSub(text.slice(last, i), begin + last, begin + i, true); + if (typeof sub === 'string') { + return sub; + } + sub.begin -= 1; + sub.end += 1; + parts.push(sub); + } else if (text[i] === '*') { + if (parts.length === 0) { + return 'Error: unexpected * at ' + (begin + i) + '.'; + } + tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; + tempNode.type = 'star'; + tempNode.sub = parts[parts.length - 1]; + parts[parts.length - 1] = tempNode; + } else if (text[i] === '+') { + if (parts.length === 0) { + return 'Error: unexpected + at ' + (begin + i) + '.'; + } + virNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; + virNode.type = 'star'; + virNode.sub = parts[parts.length - 1]; + tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; + tempNode.type = 'cat'; + tempNode.parts = [parts[parts.length - 1], virNode]; + parts[parts.length - 1] = tempNode; + } else if (text[i] === '?') { + if (parts.length === 0) { + return 'Error: unexpected + at ' + (begin + i) + '.'; + } + virNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; + virNode.type = 'empty'; + virNode.sub = parts[parts.length - 1]; + tempNode = {'begin': parts[parts.length - 1].begin, 'end': parts[parts.length - 1].end + 1}; + tempNode.type = 'or'; + tempNode.parts = [parts[parts.length - 1], virNode]; + parts[parts.length - 1] = tempNode; + } else if (text[i] === 'ϵ') { + tempNode = {'begin': begin + i, 'end': begin + i + 1}; + tempNode.type = 'empty'; + parts.push(tempNode); + } else if (Array.isArray(text[i])) { + tempNode = {'begin': begin + i, 'end': begin + i + 1}; + tempNode.type = 'text'; + tempNode.text = text[i][0]; + parts.push(tempNode); + } else { + tempNode = {'begin': begin + i, 'end': begin + i + 1}; + tempNode.type = 'text'; + tempNode.text = text[i]; + parts.push(tempNode); + } + } + if (parts.length === 1) { + return parts[0]; + } + node.type = 'cat'; + node.parts = parts; + } + return node; + } - let new_text = []; - let i = 0; - while (i < text.length) { - if (text[i] == '\\') { - const escapeMap = { n: '\n', r: '\r', t: '\t', v: '\v', f: '\f', '^': String.fromCharCode(128) }; - const char = text[i + 1]; - new_text.push([escapeMap[char] || char]); - i += 2; - } else { - new_text.push(text[i]); - i += 1; - } - } - return parseSub(new_text, 0, new_text.length, true); + let new_text = []; + let i = 0; + while (i < text.length) { + if (text[i] == '\\') { + const escapeMap = { n: '\n', r: '\r', t: '\t', v: '\v', f: '\f', '^': String.fromCharCode(128) }; + const char = text[i + 1]; + new_text.push([escapeMap[char] || char]); + i += 2; + } else { + new_text.push(text[i]); + i += 1; + } + } + return parseSub(new_text, 0, new_text.length, true); } /** @@ -160,59 +160,59 @@ function parseRegex(text) { * @return {object|string} */ function regexToNfa(text) { - 'use strict'; - function generateGraph(node, start, end, count) { - var i, last, temp, tempStart, tempEnd; - if (!start.hasOwnProperty('id')) { - start.id = count; - count += 1; - } - switch (node.type) { - case 'empty': - start.edges.push(['ϵ', end]); - break; - case 'text': - start.edges.push([node.text, end]); - break; - case 'cat': - last = start; - for (i = 0; i < node.parts.length - 1; i += 1) { - temp = {'type': '', 'edges': []}; - count = generateGraph(node.parts[i], last, temp, count); - last = temp; - } - count = generateGraph(node.parts[node.parts.length - 1], last, end, count); - break; - case 'or': - for (i = 0; i < node.parts.length; i += 1) { - tempStart = {'type': '', 'edges': []}; - tempEnd = {'type': '', 'edges': [['ϵ', end]]}; - start.edges.push(['ϵ', tempStart]); - count = generateGraph(node.parts[i], tempStart, tempEnd, count); - } - break; - case 'star': - tempStart = {'type': '', 'edges': []}; - tempEnd = {'type': '', 'edges': [['ϵ', tempStart], ['ϵ', end]]}; - start.edges.push(['ϵ', tempStart]); - start.edges.push(['ϵ', end]); - count = generateGraph(node.sub, tempStart, tempEnd, count); - break; - } - if (!end.hasOwnProperty('id')) { - end.id = count; - count += 1; - } - return count; - } - var ast = parseRegex(text), - start = {'type': 'start', 'edges': []}, - accept = {'type': 'accept', 'edges': []}; - if (typeof ast === 'string') { - return ast; - } - generateGraph(ast, start, accept, 0); - return start; + 'use strict'; + function generateGraph(node, start, end, count) { + var i, last, temp, tempStart, tempEnd; + if (!start.hasOwnProperty('id')) { + start.id = count; + count += 1; + } + switch (node.type) { + case 'empty': + start.edges.push(['ϵ', end]); + break; + case 'text': + start.edges.push([node.text, end]); + break; + case 'cat': + last = start; + for (i = 0; i < node.parts.length - 1; i += 1) { + temp = {'type': '', 'edges': []}; + count = generateGraph(node.parts[i], last, temp, count); + last = temp; + } + count = generateGraph(node.parts[node.parts.length - 1], last, end, count); + break; + case 'or': + for (i = 0; i < node.parts.length; i += 1) { + tempStart = {'type': '', 'edges': []}; + tempEnd = {'type': '', 'edges': [['ϵ', end]]}; + start.edges.push(['ϵ', tempStart]); + count = generateGraph(node.parts[i], tempStart, tempEnd, count); + } + break; + case 'star': + tempStart = {'type': '', 'edges': []}; + tempEnd = {'type': '', 'edges': [['ϵ', tempStart], ['ϵ', end]]}; + start.edges.push(['ϵ', tempStart]); + start.edges.push(['ϵ', end]); + count = generateGraph(node.sub, tempStart, tempEnd, count); + break; + } + if (!end.hasOwnProperty('id')) { + end.id = count; + count += 1; + } + return count; + } + var ast = parseRegex(text), + start = {'type': 'start', 'edges': []}, + accept = {'type': 'accept', 'edges': []}; + if (typeof ast === 'string') { + return ast; + } + generateGraph(ast, start, accept, 0); + return start; } /** @@ -222,113 +222,113 @@ function regexToNfa(text) { * @return {object} dfa Returns the first element of the DFA. */ function nfaToDfa(nfa) { - 'use strict'; - function getClosure(nodes) { - var i, - closure = [], - stack = [], - symbols = [], - type = '', - top; - for (i = 0; i < nodes.length; i += 1) { - stack.push(nodes[i]); - closure.push(nodes[i]); - if (nodes[i].type === 'accept') { - type = 'accept'; - } - } - while (stack.length > 0) { - top = stack.pop(); - // If top is of type string and starts with "Error" then return error - if (typeof top === 'string' && top[0] === 'E') { - console.log(top); - continue; - } - for (i = 0; i < top.edges.length; i += 1) { - if (top.edges[i][0] === 'ϵ') { - if (closure.indexOf(top.edges[i][1]) < 0) { - stack.push(top.edges[i][1]); - closure.push(top.edges[i][1]); - if (top.edges[i][1].type === 'accept') { - type = 'accept'; - } - } - } else { - if (symbols.indexOf(top.edges[i][0]) < 0) { - symbols.push(top.edges[i][0]); - } - } - } - } - closure.sort(function (a, b) { - return a.id - b.id; - }); - symbols.sort(); - return { - 'key': closure.map(function (x) { - return x.id; - }).join(','), - 'items': closure, - 'symbols': symbols, - 'type': type, - 'edges': [], - 'trans': {} - }; - } - function getClosedMove(closure, symbol) { - var i, - j, - node, - nexts = []; - for (i = 0; i < closure.items.length; i += 1) { - node = closure.items[i]; - for (j = 0; j < node.edges.length; j += 1) { - if (symbol === node.edges[j][0]) { - if (nexts.indexOf(node.edges[j][1]) < 0) { - nexts.push(node.edges[j][1]); - } - } - } - } - return getClosure(nexts); - } - function toAlphaCount(n) { - var a = 'A'.charCodeAt(0), - z = 'Z'.charCodeAt(0), - len = z - a + 1, - s = ''; - while (n >= 0) { - s = String.fromCharCode(n % len + a) + s; - n = Math.floor(n / len) - 1; - } - return s; - } - var i, - first = getClosure([nfa]), - states = {}, - front = 0, - top, - closure, - queue = [first], - count = 0; - first.id = toAlphaCount(count); - states[first.key] = first; - while (front < queue.length) { - top = queue[front]; - front += 1; - for (i = 0; i < top.symbols.length; i += 1) { - closure = getClosedMove(top, top.symbols[i]); - if (!states.hasOwnProperty(closure.key)) { - count += 1; - closure.id = toAlphaCount(count); - states[closure.key] = closure; - queue.push(closure); - } - top.trans[top.symbols[i]] = states[closure.key]; - top.edges.push([top.symbols[i], states[closure.key]]); - } - } - return first; + 'use strict'; + function getClosure(nodes) { + var i, + closure = [], + stack = [], + symbols = [], + type = '', + top; + for (i = 0; i < nodes.length; i += 1) { + stack.push(nodes[i]); + closure.push(nodes[i]); + if (nodes[i].type === 'accept') { + type = 'accept'; + } + } + while (stack.length > 0) { + top = stack.pop(); + // If top is of type string and starts with "Error" then return error + if (typeof top === 'string' && top[0] === 'E') { + console.log(top); + continue; + } + for (i = 0; i < top.edges.length; i += 1) { + if (top.edges[i][0] === 'ϵ') { + if (closure.indexOf(top.edges[i][1]) < 0) { + stack.push(top.edges[i][1]); + closure.push(top.edges[i][1]); + if (top.edges[i][1].type === 'accept') { + type = 'accept'; + } + } + } else { + if (symbols.indexOf(top.edges[i][0]) < 0) { + symbols.push(top.edges[i][0]); + } + } + } + } + closure.sort(function (a, b) { + return a.id - b.id; + }); + symbols.sort(); + return { + 'key': closure.map(function (x) { + return x.id; + }).join(','), + 'items': closure, + 'symbols': symbols, + 'type': type, + 'edges': [], + 'trans': {} + }; + } + function getClosedMove(closure, symbol) { + var i, + j, + node, + nexts = []; + for (i = 0; i < closure.items.length; i += 1) { + node = closure.items[i]; + for (j = 0; j < node.edges.length; j += 1) { + if (symbol === node.edges[j][0]) { + if (nexts.indexOf(node.edges[j][1]) < 0) { + nexts.push(node.edges[j][1]); + } + } + } + } + return getClosure(nexts); + } + function toAlphaCount(n) { + var a = 'A'.charCodeAt(0), + z = 'Z'.charCodeAt(0), + len = z - a + 1, + s = ''; + while (n >= 0) { + s = String.fromCharCode(n % len + a) + s; + n = Math.floor(n / len) - 1; + } + return s; + } + var i, + first = getClosure([nfa]), + states = {}, + front = 0, + top, + closure, + queue = [first], + count = 0; + first.id = toAlphaCount(count); + states[first.key] = first; + while (front < queue.length) { + top = queue[front]; + front += 1; + for (i = 0; i < top.symbols.length; i += 1) { + closure = getClosedMove(top, top.symbols[i]); + if (!states.hasOwnProperty(closure.key)) { + count += 1; + closure.id = toAlphaCount(count); + states[closure.key] = closure; + queue.push(closure); + } + top.trans[top.symbols[i]] = states[closure.key]; + top.edges.push([top.symbols[i], states[closure.key]]); + } + } + return first; } /** @@ -338,192 +338,192 @@ function nfaToDfa(nfa) { * @return {object} dfa Returns the first element of the minimum DFA. */ function minDfa(dfa) { - 'use strict'; - function getReverseEdges(start) { - var i, top, symbol, next, - front = 0, - queue = [start], - visited = {}, - symbols = {}, // The input alphabet - idMap = {}, // Map id to states - revEdges = {}; // Map id to the ids which connects to the id with an alphabet - visited[start.id] = true; - while (front < queue.length) { - top = queue[front]; - front += 1; - idMap[top.id] = top; - for (i = 0; i < top.symbols.length; i += 1) { - symbol = top.symbols[i]; - if (!symbols.hasOwnProperty(symbol)) { - symbols[symbol] = true; - } - next = top.trans[symbol]; - if (!revEdges.hasOwnProperty(next.id)) { - revEdges[next.id] = {}; - } - if (!revEdges[next.id].hasOwnProperty(symbol)) { - revEdges[next.id][symbol] = []; - } - revEdges[next.id][symbol].push(top.id); - if (!visited.hasOwnProperty(next.id)) { - visited[next.id] = true; - queue.push(next); - } - } - } - return [Object.keys(symbols), idMap, revEdges]; - } - function hopcroft(symbols, idMap, revEdges) { - var i, j, k, keys, key, key1, key2, top, group1, group2, symbol, revGroup, - ids = Object.keys(idMap).sort(), - partitions = {}, - front = 0, - queue = [], - visited = {}; - group1 = []; - group2 = []; - for (i = 0; i < ids.length; i += 1) { - if (idMap[ids[i]].type === 'accept') { - group1.push(ids[i]); - } else { - group2.push(ids[i]); - } - } - key = group1.join(','); - partitions[key] = group1; - queue.push(key); - visited[key] = 0; - if (group2.length !== 0) { - key = group2.join(','); - partitions[key] = group2; - queue.push(key); - } - while (front < queue.length) { - top = queue[front]; - front += 1; - if (top) { - top = top.split(','); - for (i = 0; i < symbols.length; i += 1) { - symbol = symbols[i]; - revGroup = {}; - for (j = 0; j < top.length; j += 1) { - if (revEdges.hasOwnProperty(top[j]) && revEdges[top[j]].hasOwnProperty(symbol)) { - for (k = 0; k < revEdges[top[j]][symbol].length; k += 1) { - revGroup[revEdges[top[j]][symbol][k]] = true; - } - } - } - keys = Object.keys(partitions); - for (j = 0; j < keys.length; j += 1) { - key = keys[j]; - group1 = []; - group2 = []; - for (k = 0; k < partitions[key].length; k += 1) { - if (revGroup.hasOwnProperty(partitions[key][k])) { - group1.push(partitions[key][k]); - } else { - group2.push(partitions[key][k]); - } - } - if (group1.length !== 0 && group2.length !== 0) { - delete partitions[key]; - key1 = group1.join(','); - key2 = group2.join(','); - partitions[key1] = group1; - partitions[key2] = group2; - if (visited.hasOwnProperty(key1)) { - queue[visited[key1]] = null; - visited[key1] = queue.length; - queue.push(key1); - visited[key2] = queue.length; - queue.push(key2); - } else if (group1.length <= group2.length) { - visited[key1] = queue.length; - queue.push(key1); - } else { - visited[key2] = queue.length; - queue.push(key2); - } - } - } - } - } - } - return Object.values(partitions); - } - function buildMinNfa(start, partitions, idMap, revEdges) { - var i, j, temp, node, symbol, - nodes = [], - group = {}, - edges = {}; - partitions.sort(function (a, b) { - var ka = a.join(','), kb = b.join(','); - if (ka < kb) { - return -1; - } - if (ka > kb) { - return 1; - } - return 0; - }); - for (i = 0; i < partitions.length; i += 1) { - if (partitions[i].indexOf(start.id) >= 0) { - if (i > 0) { - temp = partitions[i]; - partitions[i] = partitions[0]; - partitions[0] = temp; - } - break; - } - } - for (i = 0; i < partitions.length; i += 1) { - node = { - id: (i + 1).toString(), - key: partitions[i].join(','), - items: [], - symbols: [], - type: idMap[partitions[i][0]].type, - edges: [], - trans: {}, - }; - for (j = 0; j < partitions[i].length; j += 1) { - node.items.push(idMap[partitions[i][j]]); - group[partitions[i][j]] = i; - } - edges[i] = {}; - nodes.push(node); - } - Object.keys(revEdges).forEach(function (to) { - Object.keys(revEdges[to]).forEach(function (symbol) { - revEdges[to][symbol].forEach(function (from) { - if (!edges[group[from]].hasOwnProperty(group[to])) { - edges[group[from]][group[to]] = {}; - } - edges[group[from]][group[to]][symbol] = true; - }); - }); - }); - Object.keys(edges).forEach(function (from) { - Object.keys(edges[from]).forEach(function (to) { - symbol = JSON.stringify(Object.keys(edges[from][to]).sort()); - nodes[from].symbols.push(symbol); - nodes[from].edges.push([symbol, nodes[to]]); - nodes[from].trans[symbol] = nodes[to]; - }); - }); - return nodes[0]; - } - var edgesTuple = getReverseEdges(dfa), - symbols = edgesTuple[0], - idMap = edgesTuple[1], - revEdges = edgesTuple[2], - partitions = hopcroft(symbols, idMap, revEdges); - return buildMinNfa(dfa, partitions, idMap, revEdges); + 'use strict'; + function getReverseEdges(start) { + var i, top, symbol, next, + front = 0, + queue = [start], + visited = {}, + symbols = {}, // The input alphabet + idMap = {}, // Map id to states + revEdges = {}; // Map id to the ids which connects to the id with an alphabet + visited[start.id] = true; + while (front < queue.length) { + top = queue[front]; + front += 1; + idMap[top.id] = top; + for (i = 0; i < top.symbols.length; i += 1) { + symbol = top.symbols[i]; + if (!symbols.hasOwnProperty(symbol)) { + symbols[symbol] = true; + } + next = top.trans[symbol]; + if (!revEdges.hasOwnProperty(next.id)) { + revEdges[next.id] = {}; + } + if (!revEdges[next.id].hasOwnProperty(symbol)) { + revEdges[next.id][symbol] = []; + } + revEdges[next.id][symbol].push(top.id); + if (!visited.hasOwnProperty(next.id)) { + visited[next.id] = true; + queue.push(next); + } + } + } + return [Object.keys(symbols), idMap, revEdges]; + } + function hopcroft(symbols, idMap, revEdges) { + var i, j, k, keys, key, key1, key2, top, group1, group2, symbol, revGroup, + ids = Object.keys(idMap).sort(), + partitions = {}, + front = 0, + queue = [], + visited = {}; + group1 = []; + group2 = []; + for (i = 0; i < ids.length; i += 1) { + if (idMap[ids[i]].type === 'accept') { + group1.push(ids[i]); + } else { + group2.push(ids[i]); + } + } + key = group1.join(','); + partitions[key] = group1; + queue.push(key); + visited[key] = 0; + if (group2.length !== 0) { + key = group2.join(','); + partitions[key] = group2; + queue.push(key); + } + while (front < queue.length) { + top = queue[front]; + front += 1; + if (top) { + top = top.split(','); + for (i = 0; i < symbols.length; i += 1) { + symbol = symbols[i]; + revGroup = {}; + for (j = 0; j < top.length; j += 1) { + if (revEdges.hasOwnProperty(top[j]) && revEdges[top[j]].hasOwnProperty(symbol)) { + for (k = 0; k < revEdges[top[j]][symbol].length; k += 1) { + revGroup[revEdges[top[j]][symbol][k]] = true; + } + } + } + keys = Object.keys(partitions); + for (j = 0; j < keys.length; j += 1) { + key = keys[j]; + group1 = []; + group2 = []; + for (k = 0; k < partitions[key].length; k += 1) { + if (revGroup.hasOwnProperty(partitions[key][k])) { + group1.push(partitions[key][k]); + } else { + group2.push(partitions[key][k]); + } + } + if (group1.length !== 0 && group2.length !== 0) { + delete partitions[key]; + key1 = group1.join(','); + key2 = group2.join(','); + partitions[key1] = group1; + partitions[key2] = group2; + if (visited.hasOwnProperty(key1)) { + queue[visited[key1]] = null; + visited[key1] = queue.length; + queue.push(key1); + visited[key2] = queue.length; + queue.push(key2); + } else if (group1.length <= group2.length) { + visited[key1] = queue.length; + queue.push(key1); + } else { + visited[key2] = queue.length; + queue.push(key2); + } + } + } + } + } + } + return Object.values(partitions); + } + function buildMinNfa(start, partitions, idMap, revEdges) { + var i, j, temp, node, symbol, + nodes = [], + group = {}, + edges = {}; + partitions.sort(function (a, b) { + var ka = a.join(','), kb = b.join(','); + if (ka < kb) { + return -1; + } + if (ka > kb) { + return 1; + } + return 0; + }); + for (i = 0; i < partitions.length; i += 1) { + if (partitions[i].indexOf(start.id) >= 0) { + if (i > 0) { + temp = partitions[i]; + partitions[i] = partitions[0]; + partitions[0] = temp; + } + break; + } + } + for (i = 0; i < partitions.length; i += 1) { + node = { + id: (i + 1).toString(), + key: partitions[i].join(','), + items: [], + symbols: [], + type: idMap[partitions[i][0]].type, + edges: [], + trans: {}, + }; + for (j = 0; j < partitions[i].length; j += 1) { + node.items.push(idMap[partitions[i][j]]); + group[partitions[i][j]] = i; + } + edges[i] = {}; + nodes.push(node); + } + Object.keys(revEdges).forEach(function (to) { + Object.keys(revEdges[to]).forEach(function (symbol) { + revEdges[to][symbol].forEach(function (from) { + if (!edges[group[from]].hasOwnProperty(group[to])) { + edges[group[from]][group[to]] = {}; + } + edges[group[from]][group[to]][symbol] = true; + }); + }); + }); + Object.keys(edges).forEach(function (from) { + Object.keys(edges[from]).forEach(function (to) { + symbol = JSON.stringify(Object.keys(edges[from][to]).sort()); + nodes[from].symbols.push(symbol); + nodes[from].edges.push([symbol, nodes[to]]); + nodes[from].trans[symbol] = nodes[to]; + }); + }); + return nodes[0]; + } + var edgesTuple = getReverseEdges(dfa), + symbols = edgesTuple[0], + idMap = edgesTuple[1], + revEdges = edgesTuple[2], + partitions = hopcroft(symbols, idMap, revEdges); + return buildMinNfa(dfa, partitions, idMap, revEdges); } -if (typeof require === "function") { - exports.parseRegex = parseRegex; - exports.regexToNfa = regexToNfa; - exports.nfaToDfa = nfaToDfa; - exports.minDfa = minDfa; +if (typeof require === 'function') { + exports.parseRegex = parseRegex; + exports.regexToNfa = regexToNfa; + exports.nfaToDfa = nfaToDfa; + exports.minDfa = minDfa; } \ No newline at end of file diff --git a/compiler/regex_to_dfa.js b/compiler/regex_to_dfa.js index d7733fc..9fed7e4 100644 --- a/compiler/regex_to_dfa.js +++ b/compiler/regex_to_dfa.js @@ -1,6 +1,5 @@ /*jslint browser: true*/ -/*global require, exports*/ -const { minDfa, nfaToDfa, regexToNfa } = require("./lexical"); +const { minDfa, nfaToDfa, regexToNfa } = require('./lexical'); /** This section defines helper regex components -- to edit the regex used, edit the return * of the test_regex function. @@ -9,206 +8,186 @@ const { minDfa, nfaToDfa, regexToNfa } = require("./lexical"); // Helper components -const a2z_nosep = "abcdefghijklmnopqrstuvwxyz"; -const A2Z_nosep = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -const a2f_nosep = "abcdef"; -const A2F_nosep = "ABCDEF"; -const r0to9_nosep = "0123456789"; +const a2z_nosep = 'abcdefghijklmnopqrstuvwxyz'; +const A2Z_nosep = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; +const a2f_nosep = 'abcdef'; +const A2F_nosep = 'ABCDEF'; +const r0to9_nosep = '0123456789'; // TODO: Note that this is replicated code in lexical.js as well // Note that ^ has to be manually replaced with \x80 in the regex -const escapeMap = { n: "\n", r: "\r", t: "\t", v: "\v", f: "\f" }; +const escapeMap = { n: '\n', r: '\r', t: '\t', v: '\v', f: '\f' }; let whitespace = Object.values(escapeMap); -const slash_s = whitespace.join("|"); - -// Escapes and prints regexes (might be buggy) -function format_regex_printable(s) { - const escaped_string_json = JSON.stringify(s); - const escaped_string = escaped_string_json.slice(1, escaped_string_json.length - 1); - return escaped_string - .replaceAll("\\\\\\\\", "\\") - .replaceAll("\\\\", "\\") - .replaceAll("\\|", "\\\\|") - .replaceAll("/", "\\/") - .replaceAll("\u000b", "\\♥") - .replaceAll("|[|", "|\\[|") - .replaceAll("|]|", "|\\]|") - .replaceAll("|.|", "|\\.|") - .replaceAll("|$|", "|\\$|") - .replaceAll("|^|", "|\\^|"); - // let escaped = escape_whitespace(escape_whitespace(s.replaceAll("\\\\", "ZZZZZZZ"))); - // let fixed = escaped.replaceAll("\\(", "(").replaceAll("\\)", ")").replaceAll("\\+", "+").replaceAll("\\*", "*").replaceAll("\\?", "?"); -} +const slash_s = whitespace.join('|'); // Note that this is not complete and very case specific i.e. can only handle a-z and a-f, and not a-c. // This function expands [] sections to convert values for https://zkregex.com/min_dfa // The input is a regex with [] and special characters (i.e. the first line of min_dfa tool) // The output is expanded regexes without any special characters function regexToMinDFASpec(str) { - // Replace all A-Z with A2Z etc - // TODO: Upstream this to min_dfa - let combined_nosep = str - .replaceAll("A-Z", A2Z_nosep) - .replaceAll("a-z", a2z_nosep) - .replaceAll("A-F", A2F_nosep) - .replaceAll("a-f", a2f_nosep) - .replaceAll("0-9", r0to9_nosep) - .replaceAll("\\w", A2Z_nosep + r0to9_nosep + a2z_nosep + "_") - .replaceAll("\\d", r0to9_nosep) - .replaceAll("\\s", slash_s); - - function addPipeInsideBrackets(str) { - let result = ""; - let insideBrackets = false; - for (let i = 0; i < str.length; i++) { - if (str[i] === "[") { - result += str[i]; - insideBrackets = true; - continue; - } else if (str[i] === "]") { - insideBrackets = false; - } - let str_to_add = str[i]; - if (str[i] === "\\") { - i++; - str_to_add += str[i]; - } - result += insideBrackets ? "|" + str_to_add : str_to_add; + // Replace all A-Z with A2Z etc + // TODO: Upstream this to min_dfa + let combined_nosep = str + .replaceAll('A-Z', A2Z_nosep) + .replaceAll('a-z', a2z_nosep) + .replaceAll('A-F', A2F_nosep) + .replaceAll('a-f', a2f_nosep) + .replaceAll('0-9', r0to9_nosep) + .replaceAll('\\w', A2Z_nosep + r0to9_nosep + a2z_nosep + '_') + .replaceAll('\\d', r0to9_nosep) + .replaceAll('\\s', slash_s); + + function addPipeInsideBrackets(str) { + let result = ''; + let insideBrackets = false; + for (let i = 0; i < str.length; i++) { + if (str[i] === '[') { + result += str[i]; + insideBrackets = true; + continue; + } else if (str[i] === ']') { + insideBrackets = false; + } + let str_to_add = str[i]; + if (str[i] === '\\') { + i++; + str_to_add += str[i]; + } + result += insideBrackets ? '|' + str_to_add : str_to_add; + } + return result.replaceAll('[|', '[').replaceAll('[', '(').replaceAll(']', ')'); } - return result.replaceAll("[|", "[").replaceAll("[", "(").replaceAll("]", ")"); - } - - // function makeCurlyBracesFallback(str) { - // let result = ""; - // let insideBrackets = false; - // for (let i = 0; i < str.length; i++) { - // if (str[i] === "{") { - // result += str[i]; - // insideBrackets = true; - // continue; - // } else if (str[i] === "}") { - // insideBrackets = false; - // } - // result += insideBrackets ? "|" + str[i] : str[i]; - // } - // return result.replaceAll("[|", "[").replaceAll("[", "(").replaceAll("]", ")"); - // } - - function checkIfBracketsHavePipes(str) { - let result = true; - let insideBrackets = false; - let insideParens = 0; - let indexAt = 0; - for (let i = 0; i < str.length; i++) { - if (indexAt >= str.length) break; - if (str[indexAt] === "[") { - insideBrackets = true; - indexAt++; - continue; - } else if (str[indexAt] === "]") { - insideBrackets = false; - } - if (str[indexAt] === "(") { - insideParens++; - } else if (str[indexAt] === ")") { - insideParens--; - } - if (insideBrackets) { - if (str[indexAt] === "|") { - indexAt++; - } else { - result = false; - return result; + + // function makeCurlyBracesFallback(str) { + // let result = ""; + // let insideBrackets = false; + // for (let i = 0; i < str.length; i++) { + // if (str[i] === "{") { + // result += str[i]; + // insideBrackets = true; + // continue; + // } else if (str[i] === "}") { + // insideBrackets = false; + // } + // result += insideBrackets ? "|" + str[i] : str[i]; + // } + // return result.replaceAll("[|", "[").replaceAll("[", "(").replaceAll("]", ")"); + // } + + function checkIfBracketsHavePipes(str) { + let result = true; + let insideBrackets = false; + let insideParens = 0; + let indexAt = 0; + for (let i = 0; i < str.length; i++) { + if (indexAt >= str.length) break; + if (str[indexAt] === '[') { + insideBrackets = true; + indexAt++; + continue; + } else if (str[indexAt] === ']') { + insideBrackets = false; + } + if (str[indexAt] === '(') { + insideParens++; + } else if (str[indexAt] === ')') { + insideParens--; + } + if (insideBrackets) { + if (str[indexAt] === '|') { + indexAt++; + } else { + result = false; + return result; + } + } + if (!insideParens && str[indexAt] === '|') { + console.log('Error: | outside of parens!'); + } + if (str[indexAt] === '\\') { + indexAt++; + } + indexAt++; } - } - if (!insideParens && str[indexAt] === "|") { - console.log("Error: | outside of parens!"); - } - if (str[indexAt] === "\\") { - indexAt++; - } - indexAt++; + return result; } - return result; - } - let combined; - if (!checkIfBracketsHavePipes(combined_nosep)) { + let combined; + if (!checkIfBracketsHavePipes(combined_nosep)) { // console.log("Adding pipes within brackets between everything!"); - combined = addPipeInsideBrackets(combined_nosep); - if (!checkIfBracketsHavePipes(combined)) { - console.log("Did not add brackets correctly!"); + combined = addPipeInsideBrackets(combined_nosep); + if (!checkIfBracketsHavePipes(combined)) { + console.log('Did not add brackets correctly!'); + } + } else { + combined = combined_nosep; } - } else { - combined = combined_nosep; - } - return combined; + return combined; } function toNature(col) { - var i, - j, - base = "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - result = 0; - if ("1" <= col[0] && col[0] <= "9") { - result = parseInt(col, 10); - } else { - for (i = 0, j = col.length - 1; i < col.length; i += 1, j -= 1) { - result += Math.pow(base.length, j) * (base.indexOf(col[i]) + 1); + var i, + j, + base = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + result = 0; + if ('1' <= col[0] && col[0] <= '9') { + result = parseInt(col, 10); + } else { + for (i = 0, j = col.length - 1; i < col.length; i += 1, j -= 1) { + result += Math.pow(base.length, j) * (base.indexOf(col[i]) + 1); + } } - } - return result; + return result; } function regexToGraph(regex) { - let nfa = regexToNfa(regex); - let dfa = minDfa(nfaToDfa(nfa)); - - var i, - j, - states = {}, - nodes = [], - stack = [dfa], - symbols = [], - top; - - while (stack.length > 0) { - top = stack.pop(); - if (!states.hasOwnProperty(top.id)) { - states[top.id] = top; - top.nature = toNature(top.id); - nodes.push(top); - for (i = 0; i < top.edges.length; i += 1) { - if (top.edges[i][0] !== "ϵ" && symbols.indexOf(top.edges[i][0]) < 0) { - symbols.push(top.edges[i][0]); + let nfa = regexToNfa(regex); + let dfa = minDfa(nfaToDfa(nfa)); + + var i, + states = {}, + nodes = [], + stack = [dfa], + symbols = [], + top; + + while (stack.length > 0) { + top = stack.pop(); + if (!Object.keys(states).includes(top.id)) { + states[top.id] = top; + top.nature = toNature(top.id); + nodes.push(top); + for (i = 0; i < top.edges.length; i += 1) { + if (top.edges[i][0] !== 'ϵ' && symbols.indexOf(top.edges[i][0]) < 0) { + symbols.push(top.edges[i][0]); + } + stack.push(top.edges[i][1]); + } } - stack.push(top.edges[i][1]); - } } - } - nodes.sort(function (a, b) { - return a.nature - b.nature; - }); - symbols.sort(); - - let graph = []; - for (let i = 0; i < nodes.length; i += 1) { - let curr = {}; - curr.type = nodes[i].type; - curr.edges = {}; - for (let j = 0; j < symbols.length; j += 1) { - if (nodes[i].trans.hasOwnProperty(symbols[j])) { - curr.edges[symbols[j]] = nodes[i].trans[symbols[j]].nature - 1; - } + nodes.sort(function (a, b) { + return a.nature - b.nature; + }); + symbols.sort(); + + let graph = []; + for (let i = 0; i < nodes.length; i += 1) { + let curr = {}; + curr.type = nodes[i].type; + curr.edges = {}; + for (let j = 0; j < symbols.length; j += 1) { + if (Object.keys(nodes[i].trans).includes(symbols[j])) { + curr.edges[symbols[j]] = nodes[i].trans[symbols[j]].nature - 1; + } + } + graph[nodes[i].nature - 1] = curr; } - graph[nodes[i].nature - 1] = curr; - } - return JSON.stringify(graph); + return JSON.stringify(graph); } -if (typeof require === "function") { - exports.regexToMinDFASpec = regexToMinDFASpec; - exports.toNature = toNature; - exports.regexToGraph = regexToGraph +if (typeof require === 'function') { + exports.regexToMinDFASpec = regexToMinDFASpec; + exports.toNature = toNature; + exports.regexToGraph = regexToGraph; }