Skip to content

Commit

Permalink
Updated article again!
Browse files Browse the repository at this point in the history
  • Loading branch information
cliftonm committed Apr 11, 2015
1 parent 3aef80c commit 8b853f1
Showing 1 changed file with 288 additions and 6 deletions.
294 changes: 288 additions & 6 deletions articles/Semtrex Intro/semtrex intro.htm
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ <h3>Meaning: Symbols and Structures</h3>
specific context.&nbsp; A context means a collection of symbols used in a
particular way.&quot;</p>
<p>For example, here is an ottoman used as a table, a footstool, and a chair.&nbsp; </p>
<p><img border="0" src="ott1.png" width="256" height="455"><img border="0" src="ott2.png" width="256" height="455"><img border="0" src="ott3.png" width="455" height="256"></p>
<p><img border="0" src="ott1.png" width="256" height="455"></p><p><img border="0" src="ott2.png" width="256" height="455"></p><p><img border="0" src="ott3.png" width="455" height="256"></p>
<p align="center">(Arthur Brock working, relaxing, and play chess with Eric
Harris-Braun)</p>
<p>The context provides essential additional meaning to the symbol &quot;ottoman.&quot;</p>
Expand Down Expand Up @@ -811,19 +811,301 @@ <h4>Matching Against the Embodied Semtrex</h4>
}
</pre>
<h2>On the 'C' Side</h2>
<p>As mentioned in the introduction, Eric implemented the actual algorithms in C
for portability and performance.&nbsp; He's also written a large suite of unit
tests which we'll give an example of as well.</p>
<h3>Semtrex Parsing</h3>
<p>Some of the Semtrex parsing was inspired by Russ Cox' article
<a href="https://web.archive.org/web/20150218105004/http:/swtch.com/~rsc/regexp/regexp1.html">
Regular Expression Matching Can Be Simple and Fast</a>, and in particular the
algorithm for flattening an non-finite automata (NFA) to a finite one.</p>
<p>&nbsp;</p>
algorithm for flattening an non-finite automata (NFA) to a finite one.&nbsp;
Here, for example, is the C code that create the finite automata:</p>
<pre>/**
* Given a Semtrex tree, build a partial FSA (returned via in as a pointer to the starting state, a list of output states, and a count of the total number of states created).
*/
char * __stx_makeFA(T *t,SState **in,Ptrlist **out,int level,int *statesP) {
SState *s,*i,*last,*s1,*s2;
Ptrlist *o,*o1;
char *err;
int state_type = -1;
int x;
SemanticID group_symbol;
int group_id;
T *v;

int c = _t_children(t);
Symbol sym = _t_symbol(t);
switch(sym.id) {
case SEMTREX_VALUE_LITERAL_ID:
case SEMTREX_VALUE_LITERAL_NOT_ID:
state_type = StateValue;
s = state(state_type,statesP);
s-&gt;data.value.flags = (sym.id == SEMTREX_VALUE_LITERAL_NOT_ID) ? LITERAL_NOT : 0;
// copy the value set (which must be the first child) from the semtrex into the state
v = _t_child(t,1);
if (!v) {
raise_error0(&quot;expecting value or SEMTREX_VALUE_SET as first child of SEMTREX_VALUE_LITERAL&quot;);
}
if (semeq(_t_symbol(v),SEMTREX_VALUE_SET)) s-&gt;data.value.flags |= LITERAL_SET;

s-&gt;data.value.values = _t_clone(v);
*in = s;
s-&gt;transition = level;
*out = list1(&amp;s-&gt;out);
break;

case SEMTREX_SYMBOL_LITERAL_ID:
case SEMTREX_SYMBOL_LITERAL_NOT_ID:
state_type = StateSymbol;

v = _t_child(t,1);
int is_set;
Symbol vsym = _t_symbol(v);
if (!v || !((is_set = semeq(SEMTREX_SYMBOL_SET,vsym)) || semeq(SEMTREX_SYMBOL,vsym))) {
raise_error0(&quot;expecting SEMTREX_SYMBOL_SET or SEMTREX_SYMBOL as first child of SEMTREX_SYMBOL_LITERAL&quot;);
}
if (c &gt; 2) return &quot;Symbol literal must have 0 or 1 children other than the symbol/set&quot;;
s = state(state_type,statesP);
s-&gt;data.symbol.flags = (sym.id == SEMTREX_SYMBOL_LITERAL_NOT_ID) ? LITERAL_NOT : 0;
if (is_set) s-&gt;data.symbol.flags |= LITERAL_SET;
s-&gt;data.symbol.symbols = _t_clone(v);
*in = s;
if (c &gt; 1) {
s-&gt;transition = TransitionDown;
err = __stx_makeFA(_t_child(t,2),&amp;i,&amp;o,level-1,statesP);
if (err) return err;
s-&gt;out = i;
*out = o;
}
else {
s-&gt;transition = level;
*out = list1(&amp;s-&gt;out);
}
break;

case SEMTREX_SYMBOL_ANY_ID:
state_type = StateAny;
if (c &gt; 1) return &quot;Symbol any must have 0 or 1 children&quot;;

s = state(state_type,statesP);

*in = s;
if (c &gt; 0) {
s-&gt;transition = TransitionDown;
err = __stx_makeFA(_t_child(t,1),&amp;i,&amp;o,level-1,statesP);
if (err) return err;
s-&gt;out = i;
*out = o;
}
else {
s-&gt;transition = level;
*out = list1(&amp;s-&gt;out);
}
break;

case SEMTREX_SEQUENCE_ID:
if (c == 0) return &quot;Sequence must have children&quot;;
last = 0;
for(x=c;x&gt;=1;x--) {
err = __stx_makeFA(_t_child(t,x),&amp;i,&amp;o,level,statesP);
if (err) return err;

if (last) patch(o,last,level);
else *out = o;
last = i;
*in = i;
}
break;

case SEMTREX_OR_ID:
if (c != 2) return &quot;Or must have 2 children&quot;;
s = state(StateSplit,statesP);
*in = s;
err = __stx_makeFA(_t_child(t,1),&amp;i,&amp;o,level,statesP);
if (err) return err;
s-&gt;out = i;
err = __stx_makeFA(_t_child(t,2),&amp;i,&amp;o1,level,statesP);
if (err) return err;
s-&gt;out1 = i;
*out = append(o,o1);
break;

case SEMTREX_ZERO_OR_MORE_ID:
if (c != 1) return &quot;Star must have 1 child&quot;;
s = state(StateSplit,statesP);
*in = s;
err = __stx_makeFA(_t_child(t,1),&amp;i,&amp;o,level,statesP);
if (err) return err;
s-&gt;out = i;
patch(o,s,level);
*out = list1(&amp;s-&gt;out1);
break;

case SEMTREX_ONE_OR_MORE_ID:
if (c != 1) return &quot;Plus must have 1 child&quot;;
s = state(StateSplit,statesP);
err = __stx_makeFA(_t_child(t,1),&amp;i,&amp;o,level,statesP);
if (err) return err;
*in = i;
s-&gt;out = i;
patch(o,s,level);
*out = list1(&amp;s-&gt;out1);
break;

case SEMTREX_ZERO_OR_ONE_ID:
if (c != 1) return &quot;Question must have 1 child&quot;;
s = state(StateSplit,statesP);
*in = s;
err = __stx_makeFA(_t_child(t,1),&amp;i,&amp;o,level,statesP);
if (err) return err;
s-&gt;out = i;
*out = append(o,list1(&amp;s-&gt;out1));
break;

case SEMTREX_GROUP_ID:
if (c != 1) return &quot;Group must have 1 child&quot;;
s = state(StateGroupOpen,statesP);
*in = s;
group_symbol = *(SemanticID *)_t_surface(t);
group_id = ++G_group_id;
s-&gt;data.groupo.symbol = group_symbol;
s-&gt;data.groupo.uid = group_id;
err = __stx_makeFA(_t_child(t,1),&amp;i,&amp;o,level,statesP);
if (err) return err;
s-&gt;out = i;
s1 = state(StateGroupClose,statesP);
patch(o,s1,level);
s1-&gt;data.groupc.openP = s;
*out = list1(&amp;s1-&gt;out);
break;

case SEMTREX_DESCEND_ID:
if (c != 1) return &quot;Descend must have 1 child&quot;;
s = state(StateDescend,statesP);
*in = s;
err = __stx_makeFA(_t_child(t,1),&amp;i,&amp;o,level-1,statesP);
if (err) return err;
s-&gt;out = i;
*out = o;
break;

case SEMTREX_NOT_ID:
if (c != 1) return &quot;Not must have 1 child&quot;;
s = state(StateNot,statesP);
*in = s;
err = __stx_makeFA(_t_child(t,1),&amp;i,&amp;o,level,statesP);
if (err) return err;
s-&gt;out = i;
*out = append(o,list1(&amp;s-&gt;out1));
break;

case SEMTREX_WALK_ID:
if (c != 1) return &quot;Walk must have 1 child&quot;;
s = state(StateWalk,statesP);
*in = s;
err = __stx_makeFA(_t_child(t,1),&amp;i,&amp;o,level,statesP);
if (err) return err;
s-&gt;out = i;
*out = o;
break;

default:
return &quot;Unknown SEMTREX SYMBOL&quot;;
}
return 0;
}

/**
* wrapper function for building the finite state automata recursively and patching it to the final match state
*/
SState * _stx_makeFA(T *t,int *statesP) {
SState *in;
Ptrlist *o;
G_group_id = 0;
char *err = __stx_makeFA(t,&amp;in,&amp;o,0,statesP);
if (err != 0) {raise_error0(err);}
patch(o,&amp;matchstate,0);
return in;
}</pre>
<h3>Unit Tests</h3>
<p>We'll look at the unit test for the above code.&nbsp; First, a couple macros:</p>
<pre>/// macro to add a single symbol literal to semtrex tree
#define _sl(t,s) __sl(t,0,1,s)

/// macro to add a single symbol literal not to semtrex tree
#define _sln(t,s) __sl(t,1,1,s)</pre>
<p>These are helpers for calling a function to create a Semtrex symbols set:</p>
<pre>/**
* utility function to create a semtrex litteral symbol set
*/
T *__sl(T *p, int not,int count, ...) {
va_list symbols;
T *t = _t_newr(p,not ? SEMTREX_SYMBOL_LITERAL_NOT : SEMTREX_SYMBOL_LITERAL);
T *ss = count &gt; 1 ? _t_newr(t,SEMTREX_SYMBOL_SET) : t;
va_start(symbols,count);
int i;
for(i=0;i&lt;count;i++) {
_t_news(ss,SEMTREX_SYMBOL,va_arg(symbols,Symbol));
}
va_end(symbols);
return t;
}</pre>
<p>This is employed in the function that creates the test Semtrex:</p>
<pre>
T *_makeTestSemtrex1() {
// /TEST_STR_SYMBOL/(1/11/111),2,3
T *s = _sl(0,TEST_STR_SYMBOL);
T *ss = _t_newi(s,SEMTREX_SEQUENCE,0);
T *s1 = _sl(ss,sy1);
T *s11 = _sl(s1,sy11);
T *s111 = _sl(s11,sy111);
T *s2 = _sl(ss,sy2);
T *s3 = _sl(ss,sy3);
return s;
}
</pre>
<p>Utilized by the test function:</p>
<pre>void testMakeFA() {
SState *s1, *s2, *s3, *s4, *s5, *s6;
T *s = _makeTestSemtrex1();

int states = 0;
SState *sa = _stx_makeFA(s,&amp;states);
spec_is_equal(states,6);

spec_state_equal(sa,StateSymbol,TransitionDown,TEST_STR_SYMBOL);

s1 = sa-&gt;out;
spec_state_equal(s1,StateSymbol,TransitionDown,sy1);

s2 = s1-&gt;out;
spec_state_equal(s2,StateSymbol,TransitionDown,sy11);

s3 = s2-&gt;out;
spec_state_equal(s3,StateSymbol,-2,sy111);

s4 = s3-&gt;out;
spec_state_equal(s4,StateSymbol,TransitionNextChild,sy2);

s5 = s4-&gt;out;
spec_state_equal(s5,StateSymbol,TransitionUp,sy3);

s6 = s5-&gt;out;
spec_is_equal(s6-&gt;type,StateMatch);

spec_is_ptr_equal(s6-&gt;out,NULL);

_stx_freeFA(sa);
_t_free(s);
}</pre>
<h2>Semtrex Tokens - Detail Section for Regular Expression Geeks</h2>
<p>The format for this section describes each:</p>
<ul>
<li>Semantic symbol (the sub-header in this section)</li>
<li>Its textual representation</li>
<li>Its textual representation</li>
<li>What it matches</li>
<li>Example</li>
<li>Additional explanation</li>
<li>Example</li>
<li>Additional explanation</li>
</ul>
<h3>SEMTREX_SYMBOL_LITERAL</h3>
<p>The name of the symbol that it matches</p>
Expand Down

0 comments on commit 8b853f1

Please sign in to comment.