Skip to content

Commit

Permalink
Add group and negate regex support.
Browse files Browse the repository at this point in the history
  • Loading branch information
SoraSuegami committed Sep 29, 2023
1 parent 98f5483 commit 70a1277
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 46 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# zk-regex

A library to do regex verification in circom, adapted from the original zk-email. Also spits out a halo2 lookup table for [halo2-regex](https://github.com/zkemail/halo2-regex) parsing. Note that you need to add valid accept states and substring definitions yourself. Releasing a tool to do this soon.
A library to do regex verification in circom and [halo2-regex](https://github.com/zkemail/halo2-regex).

We've forked [min-dfa into a UI here](https://mindfa.onrender.com/min_dfa) to create a UI that converts existing regexes with [] support, as well as escapes \_, and the character classes a-z, A-Z, and 0-9. It also shows the DFA states very clearly so you can choose accept states easily. This should make converting regexes into DFA form way cleaner.
<!-- We've forked [min-dfa into a UI here](https://mindfa.onrender.com/min_dfa) to create a UI that converts existing regexes with [] support, as well as escapes \_, and the character classes a-z, A-Z, and 0-9. It also shows the DFA states very clearly so you can choose accept states easily. This should make converting regexes into DFA form way cleaner. -->

## introduction
## Introduction

The compilation command generates a circom file at build/compiled.circom. This code is a JS adaptation of the Python regex-to-circom work done by [sampriti](https://github.com/sampritipanda/) and [yush_g](https://twitter.com/yush_g) at https://github.com/zk-email-verify/zk-email-verify/tree/main/regex_to_circom.

Expand Down
10 changes: 5 additions & 5 deletions packages/compiler/src/bin/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ enum Commands {
decomposed_regex_path: String,
#[arg(short, long)]
halo2_dir_path: Option<String>,
#[arg(long)]
#[arg(short, long)]
circom_file_path: Option<String>,
#[arg(long)]
circom_template_name: Option<String>,
#[arg(short, long)]
template_name: Option<String>,
#[arg(short, long)]
gen_substrs: Option<bool>,
},
Expand Down Expand Up @@ -53,14 +53,14 @@ fn main() {
decomposed_regex_path,
halo2_dir_path,
circom_file_path,
circom_template_name,
template_name,
gen_substrs,
} => {
gen_from_decomposed(
&decomposed_regex_path,
halo2_dir_path.as_ref().map(|s| s.as_str()),
circom_file_path.as_ref().map(|s| s.as_str()),
circom_template_name.as_ref().map(|s| s.as_str()),
template_name.as_ref().map(|s| s.as_str()),
gen_substrs,
);
}
Expand Down
2 changes: 1 addition & 1 deletion packages/compiler/src/circom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ impl RegexAndDFA {
template_name: &str,
gen_substrs: bool,
) -> Result<(), CompilerError> {
let all_regex = String::new();
// let all_regex = String::new();
let circom = gen_circom_allstr(&self.dfa_val, template_name)?;
if gen_substrs {
self.add_substrs_constraints(circom_path, circom)?;
Expand Down
59 changes: 46 additions & 13 deletions packages/compiler/src/gen_circom.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ function genCircomAllstr(graph_json, template_name) {
for (let k in graph_json[i]["edges"]) {
const v = graph_json[i]["edges"][k];
rev_graph[v][i] = Array.from(JSON.parse(k)).map(c => c.charCodeAt());
if (i == 0) {
if (i === 0) {
const index = rev_graph[v][i].indexOf(94);
if (index != -1) {
if (index !== -1) {
init_going_state = v;
rev_graph[v][i][index] = 128;
}
Expand All @@ -31,9 +31,9 @@ function genCircomAllstr(graph_json, template_name) {
accept_nodes.add(i);
}
}
if (init_going_state != null) {
if (init_going_state !== null) {
for (const [going_state, chars] of Object.entries(to_init_graph)) {
if (chars.length == 0) {
if (chars.length === 0) {
continue;
}
if (rev_graph[going_state][init_going_state] == null) {
Expand All @@ -43,11 +43,11 @@ function genCircomAllstr(graph_json, template_name) {
}
}

if (accept_nodes[0] != null) {
if (accept_nodes[0] === null) {
throw new Error("accept node must not be 0");
}
accept_nodes = [...accept_nodes];
if (accept_nodes.length != 1) {
if (accept_nodes.length !== 1) {
throw new Error("the size of accept nodes must be one");
}

Expand All @@ -68,13 +68,38 @@ function genCircomAllstr(graph_json, template_name) {
lines.push(`\t\tstate_changed[i] = MultiOR(${N - 1});`);
for (let i = 1; i < N; i++) {
const outputs = [];
let is_negates = [];
for (let prev_i of Object.keys(rev_graph[i])) {
const k = rev_graph[i][prev_i];
const eq_outputs = [];
const vals = new Set(k);
if (vals.size == 0) {
let vals = new Set(k);
// let is_negate = false;
if (vals.has(0xff)) {
vals.delete(0xff);
is_negates.push(true);
} else {
is_negates.push(false);
}
if (vals.size === 0) {
continue;
}
if (is_negates[is_negates.length - 1] === true) {
for (let another_i = 1; another_i < N; another_i++) {
if (i === another_i) {
continue;
}
if (rev_graph[another_i][prev_i] === null) {
continue;
}
const another_vals = new Set(rev_graph[another_i][prev_i]);
if (another_vals.size === 0) {
continue;
}
for (let another_val of another_vals) {
vals.add(another_val);
}
}
}
const min_maxs = [];
for (let subsets of [
[digits, 47, 58],
Expand Down Expand Up @@ -130,7 +155,7 @@ function genCircomAllstr(graph_json, template_name) {

lines.push(`\t\tand[${and_i}][i] = AND();`);
lines.push(`\t\tand[${and_i}][i].a <== states[i][${prev_i}];`);
if (eq_outputs.length == 1) {
if (eq_outputs.length === 1) {
lines.push(`\t\tand[${and_i}][i].b <== ${eq_outputs[0][0]}[${eq_outputs[0][1]}][i].out;`);
} else if (eq_outputs.length > 1) {
lines.push(`\t\tmulti_or[${multi_or_i}][i] = MultiOR(${eq_outputs.length});`);
Expand All @@ -145,12 +170,20 @@ function genCircomAllstr(graph_json, template_name) {
and_i += 1;
}

if (outputs.length == 1) {
lines.push(`\t\tstates[i+1][${i}] <== and[${outputs[0]}][i].out;`);
if (outputs.length === 1) {
if (is_negates[0]) {
lines.push(`\t\tstates[i+1][${i}] <== 1 - and[${outputs[0]}][i].out;`);
} else {
lines.push(`\t\tstates[i+1][${i}] <== and[${outputs[0]}][i].out;`);
}
} else if (outputs.length > 1) {
lines.push(`\t\tmulti_or[${multi_or_i}][i] = MultiOR(${outputs.length});`);
for (let output_i = 0; output_i < outputs.length; output_i++) {
lines.push(`\t\tmulti_or[${multi_or_i}][i].in[${output_i}] <== and[${outputs[output_i]}][i].out;`);
if (is_negates[output_i]) {
lines.push(`\t\tmulti_or[${multi_or_i}][i].in[${output_i}] <== 1 - and[${outputs[output_i]}][i].out;`);
} else {
lines.push(`\t\tmulti_or[${multi_or_i}][i].in[${output_i}] <== and[${outputs[output_i]}][i].out;`);
}
}
lines.push(`\t\tstates[i+1][${i}] <== multi_or[${multi_or_i}][i].out;`);
multi_or_i += 1
Expand Down Expand Up @@ -215,7 +248,7 @@ function genCircomAllstr(graph_json, template_name) {


Set.prototype.isSuperset = function (subset) {
if (this.size == 0) {
if (this.size === 0) {
return false;
}
for (var elem of subset) {
Expand Down
43 changes: 26 additions & 17 deletions packages/compiler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ pub enum SoldityType {
#[derive(Debug, Clone)]
pub struct RegexAndDFA {
pub max_byte_size: usize,
pub all_regex: String,
// pub all_regex: String,
pub dfa_val: Vec<Value>,
pub substrs_defs: SubstrsDefs,
}
Expand Down Expand Up @@ -92,15 +92,15 @@ impl DecomposedRegexConfig {
let substrs_defs = self.extract_substr_ids(&dfa_val)?;
Ok(RegexAndDFA {
max_byte_size: self.max_byte_size,
all_regex,
// all_regex,
dfa_val,
substrs_defs,
})
}

pub fn extract_substr_ids(&self, dfa_val: &[Value]) -> Result<SubstrsDefs, CompilerError> {
let part_configs = &self.parts;
let mut graph = Graph::<bool, String, Directed, usize>::with_capacity(0, 0);
let mut graph = Graph::<bool, char, Directed, usize>::with_capacity(0, 0);
let max_state = get_max_state(dfa_val)?;
add_graph_nodes(dfa_val, &mut graph, None, max_state)?;
let accepted_state = get_accepted_state(dfa_val).ok_or(JsCallerError::NoAcceptedState)?;
Expand All @@ -113,7 +113,7 @@ impl DecomposedRegexConfig {
for state in 0..=max_state {
let node = NodeIndex::from(state);
if let Some(edge) = graph.find_edge(node, node) {
let str = graph.edge_weight(edge).unwrap().as_str();
let str = graph.edge_weight(edge).unwrap().to_string();
let bytes = str.as_bytes();
self_nodes_char.insert(node.index(), bytes[0]);
}
Expand Down Expand Up @@ -144,12 +144,15 @@ impl DecomposedRegexConfig {
if config.is_public {
public_config_indexes.push(idx);
}
let this_regex = config
.regex_def
.replace("^", "\\^")
.replace("[^", "[\u{ff}");
if idx == 0 {
let regex_def = config.regex_def.replace("^", "\\^");
part_regexes.push(Regex::new(&regex_def)?);
part_regexes.push(Regex::new(&this_regex)?);
} else {
let pre_regex = part_regexes[idx - 1].to_string();
part_regexes.push(Regex::new(&(pre_regex + &config.regex_def))?);
part_regexes.push(Regex::new(&(pre_regex + &this_regex))?);
}
}
let num_public_parts = public_config_indexes.len();
Expand All @@ -171,8 +174,8 @@ impl DecomposedRegexConfig {
.collect::<Result<Vec<EdgeIndex<usize>>, CompilerError>>()?;
let string_vec = edges
.iter()
.map(|edge| graph.edge_weight(*edge).unwrap().as_str())
.collect::<Vec<&str>>();
.map(|edge| graph.edge_weight(*edge).unwrap().to_string())
.collect::<Vec<String>>();
let path_states = path
.into_iter()
.rev()
Expand Down Expand Up @@ -299,7 +302,7 @@ impl RegexAndDFA {

Ok(RegexAndDFA {
max_byte_size,
all_regex: regex_str.to_string(),
// all_regex: regex_str.to_string(),
dfa_val,
substrs_defs,
})
Expand Down Expand Up @@ -420,7 +423,7 @@ pub(crate) fn get_max_state(dfa_val: &[Value]) -> Result<usize, JsCallerError> {

pub(crate) fn add_graph_nodes(
dfa_val: &[Value],
graph: &mut Graph<bool, String, Directed, usize>,
graph: &mut Graph<bool, char, Directed, usize>,
last_max_state: Option<usize>,
next_max_state: usize,
) -> Result<(), JsCallerError> {
Expand Down Expand Up @@ -448,12 +451,18 @@ pub(crate) fn add_graph_nodes(
}
}
let key_list: Vec<String> = serde_json::from_str(&key)?;
let mut key_str = String::new();
for key_char in key_list.iter() {
assert!(key_char.len() == 1);
key_str += key_char;
}
graph.add_edge(NodeIndex::from(next_node), NodeIndex::from(i), key_str);
// let mut key_str = String::new();
// for key_char in key_list.iter() {
// // println!("key_char {}", key_char);
// assert!(key_char.len() == 1);
// // key_str += key_char;
// }
assert_eq!(key_list[0].as_bytes().len(), 1);
graph.add_edge(
NodeIndex::from(next_node),
NodeIndex::from(i),
key_list[0].as_bytes()[0] as char,
);
}
}
Ok(())
Expand Down
Loading

0 comments on commit 70a1277

Please sign in to comment.