Skip to content

Commit

Permalink
feat: improve hash tag detection
Browse files Browse the repository at this point in the history
references #41
  • Loading branch information
sanjayaksaxena committed Jan 27, 2022
1 parent 2af4740 commit 86b1ee3
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 15 deletions.
10 changes: 5 additions & 5 deletions docs/Tokenizer.html
Expand Up @@ -241,7 +241,7 @@ <h4 class="name" id="addRegex">
<!--
<dt class="tag-source">Source:</dt>
<dd class="tag-source"><ul class="dummy"><li>
<a href="wink-tokenizer.js.html">wink-tokenizer.js</a>, <a href="wink-tokenizer.js.html#line419">line 419</a>
<a href="wink-tokenizer.js.html">wink-tokenizer.js</a>, <a href="wink-tokenizer.js.html#line420">line 420</a>
</li></ul></dd>
-->

Expand Down Expand Up @@ -517,7 +517,7 @@ <h4 class="name" id="defineConfig">
<!--
<dt class="tag-source">Source:</dt>
<dd class="tag-source"><ul class="dummy"><li>
<a href="wink-tokenizer.js.html">wink-tokenizer.js</a>, <a href="wink-tokenizer.js.html#line269">line 269</a>
<a href="wink-tokenizer.js.html">wink-tokenizer.js</a>, <a href="wink-tokenizer.js.html#line270">line 270</a>
</li></ul></dd>
-->

Expand Down Expand Up @@ -1296,7 +1296,7 @@ <h4 class="name" id="getTokensFP">
<!--
<dt class="tag-source">Source:</dt>
<dd class="tag-source"><ul class="dummy"><li>
<a href="wink-tokenizer.js.html">wink-tokenizer.js</a>, <a href="wink-tokenizer.js.html#line383">line 383</a>
<a href="wink-tokenizer.js.html">wink-tokenizer.js</a>, <a href="wink-tokenizer.js.html#line384">line 384</a>
</li></ul></dd>
-->

Expand Down Expand Up @@ -1424,7 +1424,7 @@ <h4 class="name" id="tokenize">
<!--
<dt class="tag-source">Source:</dt>
<dd class="tag-source"><ul class="dummy"><li>
<a href="wink-tokenizer.js.html">wink-tokenizer.js</a>, <a href="wink-tokenizer.js.html#line350">line 350</a>
<a href="wink-tokenizer.js.html">wink-tokenizer.js</a>, <a href="wink-tokenizer.js.html#line351">line 351</a>
</li></ul></dd>
-->

Expand Down Expand Up @@ -1617,7 +1617,7 @@ <h2><a href="index.html">Summary</a></h2><h2><a href="https://github.com/winkjs/
<br class="clear">

<footer>
Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.6.10</a> on Thu Jan 27 2022 18:17:54 GMT+0530 (India Standard Time) using the <a href="https://github.com/clenemt/docdash">docdash</a> theme.
Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.6.10</a> on Thu Jan 27 2022 20:39:38 GMT+0530 (India Standard Time) using the <a href="https://github.com/clenemt/docdash">docdash</a> theme.
</footer>

<script>prettyPrint();</script>
Expand Down
4 changes: 2 additions & 2 deletions docs/global.html
Expand Up @@ -287,7 +287,7 @@ <h4 class="name" id="tokenizer">
<!--
<dt class="tag-source">Source:</dt>
<dd class="tag-source"><ul class="dummy"><li>
<a href="wink-tokenizer.js.html">wink-tokenizer.js</a>, <a href="wink-tokenizer.js.html#line122">line 122</a>
<a href="wink-tokenizer.js.html">wink-tokenizer.js</a>, <a href="wink-tokenizer.js.html#line123">line 123</a>
</li></ul></dd>
-->

Expand Down Expand Up @@ -418,7 +418,7 @@ <h2><a href="index.html">Summary</a></h2><h2><a href="https://github.com/winkjs/
<br class="clear">

<footer>
Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.6.10</a> on Thu Jan 27 2022 18:17:54 GMT+0530 (India Standard Time) using the <a href="https://github.com/clenemt/docdash">docdash</a> theme.
Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.6.10</a> on Thu Jan 27 2022 20:39:38 GMT+0530 (India Standard Time) using the <a href="https://github.com/clenemt/docdash">docdash</a> theme.
</footer>

<script>prettyPrint();</script>
Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
Expand Up @@ -322,7 +322,7 @@ <h2><a href="index.html">Summary</a></h2><h2><a href="https://github.com/winkjs/
<br class="clear">

<footer>
Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.6.10</a> on Thu Jan 27 2022 18:17:54 GMT+0530 (India Standard Time) using the <a href="https://github.com/clenemt/docdash">docdash</a> theme.
Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.6.10</a> on Thu Jan 27 2022 20:39:38 GMT+0530 (India Standard Time) using the <a href="https://github.com/clenemt/docdash">docdash</a> theme.
</footer>

<script>prettyPrint();</script>
Expand Down
9 changes: 5 additions & 4 deletions docs/wink-tokenizer.js.html
Expand Up @@ -231,9 +231,10 @@ <h1 class="page-title">wink-tokenizer.js</h1>
var rgxNumberDV = /[\u0966-\u096F]+\/[\u0966-\u096F]+|[\u0966-\u096F](?:[\.,-\/]?[\u0966-\u096F])*(?:\.[\u0966-\u096F]+)?/g;
var rgxMention = /@\w+/g;
// Latin-1 Hashtags.
var rgxHashtagL1 = /#[a-z][a-z0-9]*/gi;
// Devanagari Hashtags; include Latin-1 as well.
var rgxHashtagDV = /#[\u0900-\u0963\u0970-\u097F][\u0900-\u0963\u0970-\u097F\u0966-\u096F0-9]*/gi;
// Include entire Latin-1 script and not just English alphas.
var rgxHashtagL1 = /#[a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF_][a-z0-9\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF_]*/gi;
// Devanagari Hashtags
var rgxHashtagDV = /#[\u0900-\u0963\u0970-\u097F_][\u0900-\u0963\u0970-\u097F\u0966-\u096F0-9_]*/gi;
// EMail is EN character set.
var rgxEmail = /[-!#$%&amp;'*+\/=?^\w{|}~](?:\.?[-!#$%&amp;'*+\/=?^\w`{|}~])*@[a-z0-9](?:-?\.?[a-z0-9])*(?:\.[a-z](?:-?[a-z0-9])*)+/gi;
// Bitcoin, Ruble, Indian Rupee, Other Rupee, Dollar, Pound, Yen, Euro, Wong.
Expand Down Expand Up @@ -693,7 +694,7 @@ <h2><a href="index.html">Summary</a></h2><h2><a href="https://github.com/winkjs/
<br class="clear">

<footer>
Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.6.10</a> on Thu Jan 27 2022 18:17:54 GMT+0530 (India Standard Time) using the <a href="https://github.com/clenemt/docdash">docdash</a> theme.
Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.6.10</a> on Thu Jan 27 2022 20:39:38 GMT+0530 (India Standard Time) using the <a href="https://github.com/clenemt/docdash">docdash</a> theme.
</footer>

<script>prettyPrint();</script>
Expand Down
7 changes: 4 additions & 3 deletions src/wink-tokenizer.js
Expand Up @@ -39,9 +39,10 @@ var rgxNumberL1 = /\d+\/\d+|\d(?:[\.,-\/]?\d)*(?:\.\d+)?/g;
var rgxNumberDV = /[\u0966-\u096F]+\/[\u0966-\u096F]+|[\u0966-\u096F](?:[\.,-\/]?[\u0966-\u096F])*(?:\.[\u0966-\u096F]+)?/g;
var rgxMention = /@\w+/g;
// Latin-1 Hashtags.
var rgxHashtagL1 = /#[a-z][a-z0-9]*/gi;
// Devanagari Hashtags; include Latin-1 as well.
var rgxHashtagDV = /#[\u0900-\u0963\u0970-\u097F][\u0900-\u0963\u0970-\u097F\u0966-\u096F0-9]*/gi;
// Include entire Latin-1 script and not just English alphas.
var rgxHashtagL1 = /#[a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF_][a-z0-9\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF_]*/gi;
// Devanagari Hashtags
var rgxHashtagDV = /#[\u0900-\u0963\u0970-\u097F_][\u0900-\u0963\u0970-\u097F\u0966-\u096F0-9_]*/gi;
// EMail is EN character set.
var rgxEmail = /[-!#$%&'*+\/=?^\w{|}~](?:\.?[-!#$%&'*+\/=?^\w`{|}~])*@[a-z0-9](?:-?\.?[a-z0-9])*(?:\.[a-z](?:-?[a-z0-9])*)+/gi;
// Bitcoin, Ruble, Indian Rupee, Other Rupee, Dollar, Pound, Yen, Euro, Wong.
Expand Down

0 comments on commit 86b1ee3

Please sign in to comment.