Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
sanjayaksaxena
committed
Oct 17, 2017
1 parent
123bec2
commit 20a66d3
Showing
6 changed files
with
288 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
// wink-distance | ||
// Distance functions for Bag of Words, Strings, | ||
// Vectors and more. | ||
// | ||
// Copyright (C) 2017 GRAYPE Systems Private Limited | ||
// | ||
// This file is part of “wink-distance”. | ||
// | ||
// “wink-distance” is free software: you can redistribute | ||
// it and/or modify it under the terms of the GNU Affero | ||
// General Public License as published by the Free | ||
// Software Foundation, version 3 of the License. | ||
// | ||
// “wink-distance” is distributed in the hope that it will | ||
// be useful, but WITHOUT ANY WARRANTY; without even | ||
// the implied warranty of MERCHANTABILITY or FITNESS | ||
// FOR A PARTICULAR PURPOSE. See the GNU Affero General | ||
// Public License for more details. | ||
// | ||
// You should have received a copy of the GNU Affero | ||
// General Public License along with “wink-distance”. | ||
// If not, see <http://www.gnu.org/licenses/>. | ||
|
||
var jaro = require( './string-jaro.js' ); | ||
// ## string | ||
|
||
// ### jaro | ||
/** | ||
* | ||
* Computes the jaro winkler distance between two strings. This distance, | ||
* controlled by the `scalingFactor`, is always between 0 and 1. | ||
* | ||
* @name string.jaroWinkler | ||
* @param {string} str1 — first string. | ||
* @param {string} str2 — second string. | ||
* @param {number} [boostThreshold=0.3] — beyond which scaling is applied: it is | ||
* applied only if the jaro distance between the input strings is less than or | ||
* equal to this value. Any value > 1, is capped at 1 automatically. | ||
* @param {number} [scalingFactor=0.1] — is used to scale the distance. | ||
* Such scaling, if applied, is proportional to the number of shared | ||
* consecutive characters from the first character of `str1` and `str2`. | ||
* Any value > 0.25, is capped at 0.25 automatically. | ||
* @return {number} jaro winkler distance between `str1` and `str2`. | ||
* @example | ||
* jaroWinkler( 'martha', 'marhta' ); | ||
* // -> 0.03888888888888883 | ||
* jaroWinkler( 'martha', 'marhta', 0.3, 0.2 ); | ||
* // -> 0.022222222222222185 | ||
* jaroWinkler( 'duane', 'dwayne' ); | ||
* // -> .15999999999999992 | ||
*/ | ||
var jaroWinkler = function ( str1, str2, boostThreshold, scalingFactor ) { | ||
// Early exit! | ||
if ( str1 === str2 ) return 0; | ||
// Setup default values if undefined. | ||
var sf = ( scalingFactor === undefined ) ? 0.1 : scalingFactor; | ||
var bt = ( boostThreshold === undefined ) ? 0.3 : boostThreshold; | ||
// Fix scaling factor & boost threshold, if required. | ||
sf = Math.min( Math.abs( sf ), 0.25 ); | ||
bt = Math.min( Math.abs( bt ), 1 ); | ||
|
||
var distance = jaro( str1, str2 ); | ||
|
||
if ( distance > bt ) return distance; | ||
|
||
var pLimit = Math.min( str1.length, str2.length, 4 ); | ||
var l = 0; | ||
|
||
for ( var i = 0; i < pLimit; i += 1 ) { | ||
if ( str1[ i ] === str2[ i ] ) { | ||
l += 1; | ||
} else { | ||
break; | ||
} | ||
} | ||
|
||
distance -= ( l * sf * distance ); | ||
|
||
return distance; | ||
}; // jaroWinkler() | ||
|
||
module.exports = jaroWinkler; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
// wink-distance | ||
// Distance functions for Bag of Words, Strings, | ||
// Vectors and more. | ||
// | ||
// Copyright (C) 2017 GRAYPE Systems Private Limited | ||
// | ||
// This file is part of “wink-distance”. | ||
// | ||
// “wink-distance” is free software: you can redistribute | ||
// it and/or modify it under the terms of the GNU Affero | ||
// General Public License as published by the Free | ||
// Software Foundation, version 3 of the License. | ||
// | ||
// “wink-distance” is distributed in the hope that it will | ||
// be useful, but WITHOUT ANY WARRANTY; without even | ||
// the implied warranty of MERCHANTABILITY or FITNESS | ||
// FOR A PARTICULAR PURPOSE. See the GNU Affero General | ||
// Public License for more details. | ||
// | ||
// You should have received a copy of the GNU Affero | ||
// General Public License along with “wink-distance”. | ||
// If not, see <http://www.gnu.org/licenses/>. | ||
|
||
// | ||
var chai = require( 'chai' ); | ||
var mocha = require( 'mocha' ); | ||
var jaroWinkler = require( '../src/wink-distance.js' ).string.jaroWinkler; | ||
|
||
var expect = chai.expect; | ||
var describe = mocha.describe; | ||
var it = mocha.it; | ||
|
||
describe( 'string-jaro normal behaviour', function () { | ||
var tests = [ | ||
{ whenInputIs: { str1: 'SHACKLEFORD', str2: 'SHACKELFORD' }, expectedOutputIs: 0.018 }, | ||
{ whenInputIs: { str1: 'DUNNINGHAM', str2: 'CUNNIGHAM' }, expectedOutputIs: 0.104 }, | ||
{ whenInputIs: { str1: 'JONES', str2: 'JOHNSON' }, expectedOutputIs: 0.168 }, | ||
{ whenInputIs: { str1: 'MASSEY', str2: 'MASSIE' }, expectedOutputIs: 0.067 }, | ||
{ whenInputIs: { str1: 'ABROMS', str2: 'ABRAMS' }, expectedOutputIs: 0.078 }, | ||
{ whenInputIs: { str1: 'DWAYNE', str2: 'DUANE' }, expectedOutputIs: 0.160 }, | ||
{ whenInputIs: { str1: 'SEAN', str2: 'SUSAN' }, expectedOutputIs: 0.195 }, | ||
{ whenInputIs: { str1: 'MICHELLE', str2: 'MICHAEL' }, expectedOutputIs: 0.079 }, | ||
{ whenInputIs: { str1: 'MARHTA', str2: 'MARTHA' }, expectedOutputIs: 0.039 }, | ||
{ whenInputIs: { str1: 'TANYA', str2: 'TONYA' }, expectedOutputIs: 0.120 }, | ||
{ whenInputIs: { str1: 'sat', str2: 'urn' }, expectedOutputIs: 1 }, | ||
{ whenInputIs: { str1: 'saturn', str2: 'saturn' }, expectedOutputIs: 0 }, | ||
{ whenInputIs: { str1: '', str2: '' }, expectedOutputIs: 0 }, | ||
]; | ||
|
||
tests.forEach( function ( test ) { | ||
it( 'should return ' + JSON.stringify( test.expectedOutputIs ) + ' if the input is ' + JSON.stringify( test.whenInputIs ), function () { | ||
expect( +jaroWinkler( test.whenInputIs.str1, test.whenInputIs.str2 ).toFixed( 3 ) ).to.equal( test.expectedOutputIs ); | ||
} ); | ||
} ); | ||
} ); |