Skip to content

Commit

Permalink
Support both native rust and wasm.
Browse files Browse the repository at this point in the history
  • Loading branch information
SoraSuegami committed Mar 24, 2024
1 parent d1ef521 commit f096f6a
Show file tree
Hide file tree
Showing 23 changed files with 346 additions and 259 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
build:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest, macos-14]
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
Expand Down
2 changes: 1 addition & 1 deletion packages/apis/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "zk-regex-apis"
version = "2.0.0"
version = "2.0.1"
license = "MIT"
edition = "2018"
authors = [
Expand Down
6 changes: 3 additions & 3 deletions packages/apis/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@zk-email/zk-regex-apis",
"version": "2.0.0",
"version": "2.0.1",
"description": "apis compatible with [zk-regex](https://github.com/zkemail/zk-regex/tree/main).",
"contributors": [
"Javier Su <javier.su.weijie@gmail.com>",
Expand All @@ -19,8 +19,8 @@
"build-release": "npm run build --",
"install": "npm run build-debug",
"install-release": "npm run build-release",
"test": "wasm-pack test --node",
"test": "cargo test && wasm-pack test --node",
"upload-binary": "wasm-pack publish -t nodejs"
},
"license": "MIT"
}
}
190 changes: 86 additions & 104 deletions packages/apis/src/extract_substrs.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
use fancy_regex::Regex;
use js_sys::Array;
use serde::{Deserialize, Serialize};

use serde_wasm_bindgen::from_value;
use thiserror::Error;
use wasm_bindgen::prelude::*;
use wasm_bindgen_test::*;

/// A configuration of decomposed regexes.
#[derive(Debug, Clone, Serialize, Deserialize)]
Expand Down Expand Up @@ -39,174 +35,161 @@ pub enum ExtractSubstrssError {
RegexError(#[from] fancy_regex::Error),
}

#[wasm_bindgen]
pub fn extract_substr_idxes(input_str: &str, regex_config: JsValue) -> Array {
let regex_config_str = regex_config.as_string().unwrap();
let regex_config: DecomposedRegexConfig = serde_json::from_str(&regex_config_str).unwrap();
pub fn extract_substr_idxes(
input_str: &str,
regex_config: &DecomposedRegexConfig,
) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let mut entire_regex_str = String::new();
for part in regex_config.parts.iter() {
entire_regex_str += part.regex_def.as_str();
}

let entire_regex = Regex::new(&entire_regex_str).unwrap();
let entire_found = entire_regex
.find(input_str)
.unwrap()
.ok_or_else(|| {
ExtractSubstrssError::SubstringOfEntireNotFound(entire_regex, input_str.to_string())
})
.unwrap();
let entire_regex = Regex::new(&entire_regex_str)?;
let entire_found = entire_regex.find(input_str)?.ok_or_else(|| {
ExtractSubstrssError::SubstringOfEntireNotFound(entire_regex, input_str.to_string())
})?;
let mut start = entire_found.start();
let entire_end = entire_found.end();

let mut public_idxes = vec![];

for part_idx in 0..regex_config.parts.len() {
// last_regex_str = last_regex_str + regex_config.parts[part_idx].regex_def.as_str();
let regex = Regex::new(&regex_config.parts[part_idx].regex_def.as_str()).unwrap();
let found = regex
.find_from_pos(&input_str, start)
.unwrap()
.ok_or_else(|| {
ExtractSubstrssError::SubstringNotFound(
regex.clone(),
input_str[start..entire_end].to_string(),
)
})
.unwrap();
let regex = Regex::new(&regex_config.parts[part_idx].regex_def.as_str())?;
let found = regex.find_from_pos(&input_str, start)?.ok_or_else(|| {
ExtractSubstrssError::SubstringNotFound(
regex.clone(),
input_str[start..entire_end].to_string(),
)
})?;
let end = found.end();

if regex_config.parts[part_idx].is_public {
public_idxes.push((start, end));
}
start = end;
}

let arr = Array::new_with_length(public_idxes.len() as u32);
for (i, idx) in public_idxes.iter().enumerate() {
let js_arr = Array::new_with_length(2);
js_arr.set(0, JsValue::from(idx.0 as u32));
js_arr.set(1, JsValue::from(idx.1 as u32));
arr.set(i as u32, JsValue::from(js_arr));
}

arr
Ok(public_idxes)
}

#[wasm_bindgen]
pub fn extract_email_addr_idxes(input_str: &str) -> Array {
pub fn extract_email_addr_idxes(
input_str: &str,
) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let regex_config = include_str!("./decomposed_defs/email_addr.json");
extract_substr_idxes(input_str, JsValue::from_str(regex_config))
extract_substr_idxes(input_str, &serde_json::from_str(regex_config).unwrap())
}

#[wasm_bindgen]
pub fn extract_email_domain_idxes(input_str: &str) -> Array {
pub fn extract_email_domain_idxes(
input_str: &str,
) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let regex_config = include_str!("./decomposed_defs/email_domain.json");
extract_substr_idxes(input_str, JsValue::from_str(regex_config))
extract_substr_idxes(input_str, &serde_json::from_str(regex_config).unwrap())
}

#[wasm_bindgen]
pub fn extract_email_addr_with_name_idxes(input_str: &str) -> Array {
pub fn extract_email_addr_with_name_idxes(
input_str: &str,
) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let regex_config = include_str!("./decomposed_defs/email_addr_with_name.json");
extract_substr_idxes(input_str, JsValue::from_str(regex_config))
extract_substr_idxes(input_str, &serde_json::from_str(regex_config).unwrap())
}

#[wasm_bindgen]
pub fn extract_from_all_idxes(input_str: &str) -> Array {
pub fn extract_from_all_idxes(
input_str: &str,
) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let regex_config = include_str!("./decomposed_defs/from_all.json");
extract_substr_idxes(input_str, JsValue::from_str(regex_config))
extract_substr_idxes(input_str, &serde_json::from_str(regex_config).unwrap())
}

#[wasm_bindgen]
pub fn extract_from_addr_idxes(input_str: &str) -> Array {
pub fn extract_from_addr_idxes(
input_str: &str,
) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let regex_config = include_str!("./decomposed_defs/from_addr.json");
extract_substr_idxes(input_str, JsValue::from_str(regex_config))
extract_substr_idxes(input_str, &serde_json::from_str(regex_config).unwrap())
}

#[wasm_bindgen]
pub fn extract_to_all_idxes(input_str: &str) -> Array {
pub fn extract_to_all_idxes(input_str: &str) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let regex_config = include_str!("./decomposed_defs/to_all.json");
extract_substr_idxes(input_str, JsValue::from_str(regex_config))
extract_substr_idxes(input_str, &serde_json::from_str(regex_config).unwrap())
}

#[wasm_bindgen]
pub fn extract_to_addr_idxes(input_str: &str) -> Array {
pub fn extract_to_addr_idxes(input_str: &str) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let regex_config = include_str!("./decomposed_defs/to_addr.json");
extract_substr_idxes(input_str, JsValue::from_str(regex_config))
extract_substr_idxes(input_str, &serde_json::from_str(regex_config).unwrap())
}

#[wasm_bindgen]
pub fn extract_subject_all_idxes(input_str: &str) -> Array {
pub fn extract_subject_all_idxes(
input_str: &str,
) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let regex_config = include_str!("./decomposed_defs/subject_all.json");
extract_substr_idxes(input_str, JsValue::from_str(regex_config))
extract_substr_idxes(input_str, &serde_json::from_str(regex_config).unwrap())
}

#[wasm_bindgen]
pub fn extract_body_hash_idxes(input_str: &str) -> Array {
pub fn extract_body_hash_idxes(
input_str: &str,
) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let regex_config = include_str!("./decomposed_defs/body_hash.json");
extract_substr_idxes(input_str, JsValue::from_str(regex_config))
extract_substr_idxes(input_str, &serde_json::from_str(regex_config).unwrap())
}

#[wasm_bindgen]
pub fn extract_timestamp_idxes(input_str: &str) -> Array {
pub fn extract_timestamp_idxes(
input_str: &str,
) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let regex_config = include_str!("./decomposed_defs/timestamp.json");
extract_substr_idxes(input_str, JsValue::from_str(regex_config))
extract_substr_idxes(input_str, &serde_json::from_str(regex_config).unwrap())
}

#[wasm_bindgen]
pub fn extract_message_id_idxes(input_str: &str) -> Array {
pub fn extract_message_id_idxes(
input_str: &str,
) -> Result<Vec<(usize, usize)>, ExtractSubstrssError> {
let regex_config = include_str!("./decomposed_defs/message_id.json");
extract_substr_idxes(input_str, JsValue::from_str(regex_config))
extract_substr_idxes(input_str, &serde_json::from_str(regex_config).unwrap())
}

#[cfg(test)]
mod test {
use super::*;

#[wasm_bindgen_test]
#[test]
fn test_email_domain_valid() {
let input_str = "suegamisora@gmail.com";
let idxes = extract_email_domain_idxes(input_str);
assert_eq!(from_value::<Vec<u8>>(idxes.at(0)).unwrap(), vec![12, 21]);
let idxes = extract_email_domain_idxes(input_str).unwrap();
assert_eq!(idxes, vec![(12, 21)]);
}

#[wasm_bindgen_test]
#[test]
fn test_email_addr_in_subject_valid() {
let input_str = "This is sent for suegamisora@gmail.com";
let idxes = extract_email_addr_idxes(input_str);
assert_eq!(from_value::<Vec<u8>>(idxes.at(0)).unwrap(), vec![17, 38]);
let idxes = extract_email_addr_idxes(input_str).unwrap();
assert_eq!(idxes, vec![(17, 38)]);
}

#[wasm_bindgen_test]
#[test]
fn test_email_addr_with_name_valid1() {
let input_str = "from:dummy@a.com <suegamisora@gmail.com>";
let idxes = extract_email_addr_with_name_idxes(input_str);
assert_eq!(from_value::<Vec<u8>>(idxes.at(0)).unwrap(), vec![18, 39]);
let idxes = extract_email_addr_with_name_idxes(input_str).unwrap();
assert_eq!(idxes, vec![(18, 39)]);
}

#[wasm_bindgen_test]
#[test]
fn test_email_addr_with_name_valid2() {
// "末神 奏宙" has 13 bytes.
let input_str = "from:\"末神 奏宙\" <suegamisora@gmail.com>";
let idxes = extract_email_addr_with_name_idxes(input_str);
assert_eq!(from_value::<Vec<u8>>(idxes.at(0)).unwrap(), vec![22, 43]);
let idxes = extract_email_addr_with_name_idxes(input_str).unwrap();
assert_eq!(idxes, vec![(22, 43)]);
}

#[wasm_bindgen_test]
#[test]
fn test_email_from_all_valid() {
let input_str = "from:dummy@a.com <suegamisora@gmail.com>\r\n";
let idxes = extract_from_all_idxes(input_str);
assert_eq!(from_value::<Vec<u8>>(idxes.at(0)).unwrap(), vec![5, 40]);
let idxes = extract_from_all_idxes(input_str).unwrap();
assert_eq!(idxes, vec![(5, 40)]);
}

#[wasm_bindgen_test]
#[test]
fn test_email_from_addr_valid() {
let input_str = "from:dummy@a.com <suegamisora@gmail.com>\r\n";
let idxes = extract_from_addr_idxes(input_str);
assert_eq!(from_value::<Vec<u8>>(idxes.at(0)).unwrap(), vec![18, 39]);
let idxes = extract_from_addr_idxes(input_str).unwrap();
assert_eq!(idxes, vec![(18, 39)]);
}

#[wasm_bindgen_test]
#[test]
fn test_code_in_email_address_valid() {
let code_regex = DecomposedRegexConfig {
// max_byte_size: 1024,
Expand All @@ -226,30 +209,29 @@ mod test {
],
};
let input_str = "sepolia+ACCOUNTKEY.0xabc123@sendeth.org";
let code_regex = JsValue::from_str(&serde_json::to_string(&code_regex).unwrap());
let idxes = extract_substr_idxes(input_str, code_regex);
assert_eq!(from_value::<Vec<u8>>(idxes.at(0)).unwrap(), vec![21, 27]);
let idxes = extract_substr_idxes(input_str, &code_regex).unwrap();
assert_eq!(idxes, vec![(21, 27)]);
}

#[wasm_bindgen_test]
#[test]
fn test_body_hash_valid() {
let input_str = "dkim-signature:v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1694989812; x=1695594612; dara=google.com; h=to:subject:message-id:date:from:mime-version:from:to:cc:subject :date:message-id:reply-to; bh=BWETwQ9JDReS4GyR2v2TTR8Bpzj9ayumsWQJ3q7vehs=; b=";
let idxes = extract_body_hash_idxes(input_str);
assert_eq!(from_value::<Vec<u32>>(idxes.at(0)).unwrap(), vec![219, 263]);
let idxes = extract_body_hash_idxes(input_str).unwrap();
assert_eq!(idxes, vec![(219, 263)]);
}

#[wasm_bindgen_test]
#[test]
fn test_timestamp_valid() {
let input_str = "dkim-signature:v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1694989812; x=1695594612; dara=google.com; h=to:subject:message-id:date:from:mime-version:from:to:cc:subject :date:message-id:reply-to; bh=BWETwQ9JDReS4GyR2v2TTR8Bpzj9ayumsWQJ3q7vehs=; b=";
let idxes = extract_timestamp_idxes(input_str);
assert_eq!(from_value::<Vec<u8>>(idxes.at(0)).unwrap(), vec![80, 90]);
let idxes = extract_timestamp_idxes(input_str).unwrap();
assert_eq!(idxes, vec![(80, 90)]);
}

#[wasm_bindgen_test]
#[test]
fn test_message_id_valid() {
let input_str =
"message-id:<CAJ7Y6jerCWt6t4HVqfXeeqRthJpj_1vYCpXzAVgowozVFKWbVQ@mail.gmail.com>\r\n";
let idxes = extract_message_id_idxes(input_str);
assert_eq!(from_value::<Vec<u8>>(idxes.at(0)).unwrap(), vec![11, 79]);
let idxes = extract_message_id_idxes(input_str).unwrap();
assert_eq!(idxes, vec![(11, 79)]);
}
}
4 changes: 4 additions & 0 deletions packages/apis/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
pub mod extract_substrs;
pub mod padding;
#[cfg(target_arch = "wasm32")]
mod wasm;
#[cfg(target_arch = "wasm32")]
pub use crate::wasm::*;
14 changes: 2 additions & 12 deletions packages/apis/src/padding.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,5 @@
use js_sys::Array;
use wasm_bindgen::prelude::*;

#[wasm_bindgen]
pub fn pad_string(str: &str, padded_bytes_size: usize) -> Array {
pub fn pad_string(str: &str, padded_bytes_size: usize) -> Vec<u8> {
let mut padded_bytes = str.as_bytes().to_vec();
padded_bytes.append(&mut vec![0; padded_bytes_size - padded_bytes.len()]);

let arr = Array::new_with_length(padded_bytes.len() as u32);
for (i, byte) in padded_bytes.iter().enumerate() {
arr.set(i as u32, JsValue::from(*byte));
}

arr
padded_bytes
}
Loading

0 comments on commit f096f6a

Please sign in to comment.