Retext implementation of the Soundex algorithm.
npm:
npm install retext-soundex
retext-soundex is also available for bower, component, and duo, and as an AMD, CommonJS, and globals module, uncompressed and compressed.
var retext = require('retext');
var inspect = require('unist-util-inspect');
var soundex = require('retext-soundex');
retext().use(soundex).use(function () {
return function (cst) {
console.log(inspect(cst));
};
}).process('A simple English sentence.');
Yields:
RootNode[1]
└─ ParagraphNode[1]
└─ SentenceNode[8]
├─ WordNode[1] [data={"phonetics":"A000"}]
│ └─ TextNode: 'A'
├─ WhiteSpaceNode: ' '
├─ WordNode[1] [data={"phonetics":"S514"}]
│ └─ TextNode: 'simple'
├─ WhiteSpaceNode: ' '
├─ WordNode[1] [data={"phonetics":"E524"}]
│ └─ TextNode: 'English'
├─ WhiteSpaceNode: ' '
├─ WordNode[1] [data={"phonetics":"S535"}]
│ └─ TextNode: 'sentence'
└─ PunctuationNode: '.'
You can also combine it with a stemmer (such as retext-porter-stemmer or retext-lancaster-stemmer).
var retext = require('retext');
var inspect = require('unist-util-inspect');
var soundex = require('retext-soundex');
var stemmer = require('retext-porter-stemmer');
retext().use(stemmer).use(soundex).use(function () {
return function (cst) {
console.log(inspect(cst));
};
}).process('A detestable paragraph.');
Yields:
RootNode[1]
└─ ParagraphNode[1]
└─ SentenceNode[6]
├─ WordNode[1] [data={"stem":"a","phonetics":"A000","stemmedPhonetics":"A000"}]
│ └─ TextNode: 'A'
├─ WhiteSpaceNode: ' '
├─ WordNode[1] [data={"stem":"detest","phonetics":"D323","stemmedPhonetics":"D323"}]
│ └─ TextNode: 'detestable'
├─ WhiteSpaceNode: ' '
├─ WordNode[1] [data={"stem":"paragraph","phonetics":"P626","stemmedPhonetics":"P6261"}]
│ └─ TextNode: 'paragraph'
└─ PunctuationNode: '.'
None, retext-soundex automatically detects the phonetics of each
WordNode
(using wooorm/soundex-code),
and stores the phonetics in node.data.phonetics
. If a stemmer is used,
the stemmed phonetics are stored in node.data.stemmedPhonetics
.