-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
1,130 additions
and
441 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import Metric from './metric' | ||
import JaroCostOptions from './../interfaces/jaro-opts.interface' | ||
|
||
class Jaro extends Metric | ||
{ | ||
constructor (name: string = 'Jaro') | ||
{ | ||
super(name) | ||
} | ||
/** | ||
* Jaro Similarity | ||
* @param source | ||
* @param target | ||
* @param cost | ||
*/ | ||
public similarity(source: string, target: string, { deletionCost, insertionCost, substitutionCost }: JaroCostOptions = {}) : number | ||
{ | ||
const sourceLength: number = source.length; | ||
const targetLength: number = target.length; | ||
|
||
if (sourceLength == 0 && targetLength == 0) | ||
{ | ||
return 1; | ||
} | ||
|
||
const matchDistance: number = Math.max(sourceLength, targetLength) / 2 - 1; | ||
let sourceMatches: Array<boolean> = new Array(sourceLength); | ||
let targetMatches: Array<boolean> = new Array(targetLength); | ||
|
||
let matches: number = 0; | ||
let transpositions: number = 0; | ||
let start: number = 0; | ||
let end: number = 0; | ||
|
||
for (let i = 0; i < sourceLength; i++) | ||
{ | ||
start = Math.max(0, i - matchDistance); | ||
end = Math.min(i+matchDistance+1, targetLength); | ||
|
||
for (let j = start; j < end; j++ ) | ||
{ | ||
if (targetMatches[j]) continue; | ||
if (source[i] === target[j]) | ||
{ | ||
sourceMatches[i] = true; | ||
targetMatches[j] = true; | ||
matches++; | ||
break; | ||
} | ||
} | ||
} | ||
if (matches == 0) return 0; | ||
|
||
let k: number = 0; | ||
|
||
for (let i = 0; i < sourceLength; i++) | ||
{ | ||
if (!sourceMatches[i]) continue; | ||
while (!targetMatches[k]) k++; | ||
if (source[i] != target[k]) transpositions++; | ||
k++; | ||
} | ||
|
||
return ((matches / sourceLength) + (matches/targetLength) + ((matches - transpositions / 2) / matches)) / 3; | ||
} | ||
/** | ||
* Jaro distance | ||
* @param source | ||
* @param target | ||
* @param cost | ||
*/ | ||
public distance (source: string, target: string, { deletionCost, insertionCost, substitutionCost }:JaroCostOptions = {} ): number | ||
{ | ||
return 1 - this.similarity(source, target, {deletionCost, insertionCost, substitutionCost}); | ||
} | ||
|
||
|
||
} | ||
|
||
export default Jaro |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import Jaro from './jaro' | ||
import JaroCostOptions from './../interfaces/jaro-opts.interface' | ||
|
||
class JaroWinkler extends Jaro | ||
{ | ||
constructor(name: string = 'Jaro_Winkler') | ||
{ | ||
super(name); | ||
} | ||
/** | ||
* Jaro Winkler Similarity | ||
* @param source | ||
* @param target | ||
* @param cost | ||
* @param p | ||
*/ | ||
public similarity(source: string, target: string, {insertionCost, deletionCost, substitutionCost, lambdaCost, roCost}: JaroCostOptions = {}): number | ||
{ | ||
const p: number = roCost ?? 0.1; | ||
|
||
if (!( 0 <= p && p <= 0.25 )) | ||
{ | ||
new Error("The p parameter must be between 0 and 0.25"); | ||
} | ||
|
||
|
||
let l: number = 0; | ||
const maxL : number = lambdaCost ?? 4; | ||
|
||
for( let i = 0; i < maxL; i++) | ||
{ | ||
if(source[i] != target[i]) break; | ||
l++; | ||
} | ||
|
||
const j: number = super.similarity(source, target, {insertionCost, deletionCost, substitutionCost}); | ||
return j + l*p*(1 - j); | ||
|
||
} | ||
|
||
/** | ||
* Jaro Winkler distance | ||
* @param source | ||
* @param target | ||
* @param cost | ||
* @param p | ||
*/ | ||
public distance(source: string, target: string, {insertionCost, deletionCost, substitutionCost, lambdaCost, roCost}: JaroCostOptions = {}): number | ||
{ | ||
return 1 - this.similarity(source, target, {insertionCost, deletionCost, substitutionCost, lambdaCost, roCost}) | ||
} | ||
} | ||
export default JaroWinkler; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,11 @@ | ||
import Levenshtein from './hermetrics/levenshtein' | ||
import Metric from './hermetrics/metric' | ||
import Jaro from './hermetrics/jaro' | ||
import JaroWinkler from './hermetrics/jaro_winkler' | ||
|
||
export { | ||
Levenshtein, | ||
Metric | ||
Metric, | ||
Jaro, | ||
JaroWinkler | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
export default interface JaroCostOptions { | ||
deletionCost?: number | ||
insertionCost?: number | ||
substitutionCost?: number | ||
lambdaCost?: number | ||
roCost?: number | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import { describe, it } from 'mocha' | ||
import { expect } from 'chai' | ||
import Jaro from '../../src/hermetrics/jaro' | ||
|
||
describe('Jaro Metric', function() | ||
{ | ||
describe('Distance tests', function() | ||
{ | ||
it('should return 0.278 for abcd - abe', function() | ||
{ | ||
const jaro = new Jaro(); | ||
const distance = jaro.distance('abcd', 'abe'); | ||
expect(distance.toFixed(3)).equal('0.278') | ||
}) | ||
}) | ||
describe('Similarity tests', function() | ||
{ | ||
it('should return 0.722 for abcd - abe', function() | ||
{ | ||
const jaro = new Jaro(); | ||
const distance = jaro.similarity('abcd', 'abe'); | ||
expect(distance.toFixed(3)).equal('0.722') | ||
}) | ||
}) | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import { describe, it } from 'mocha' | ||
import { expect } from 'chai' | ||
import JaroWinkler from '../../src/hermetrics/jaro_winkler'; | ||
|
||
describe('Jaro Winkler Metric', function() | ||
{ | ||
describe('Distance tests', function() | ||
{ | ||
it('should return 0.222 for abcd - abe', function() | ||
{ | ||
const jaw = new JaroWinkler(); | ||
const distance = jaw.distance('abcd', 'abe'); | ||
expect(distance.toFixed(3)).equal('0.222') | ||
}) | ||
}) | ||
describe('Similarity tests', function() | ||
{ | ||
it('should return 0.778 for abcd - abe', function() | ||
{ | ||
const jaw = new JaroWinkler(); | ||
const distance = jaw.similarity('abcd', 'abe'); | ||
expect(distance.toFixed(3)).equal('0.778'); | ||
}); | ||
|
||
it('should return 0.750 for abcd - abe with ro = 0.05', function() | ||
{ | ||
const jaw = new JaroWinkler(); | ||
const distance = jaw.similarity('abcd', 'abe', {roCost:0.05}); | ||
expect(distance.toFixed(3)).equal('0.750'); | ||
}); | ||
|
||
it('should return 0.750 for abcd - abe with ro = 0.15', function() | ||
{ | ||
const jaw = new JaroWinkler(); | ||
const distance = jaw.similarity('abcd', 'abe', {roCost:0.15}); | ||
expect(distance.toFixed(3)).equal('0.806'); | ||
}); | ||
|
||
it('should return 0.750 for abcd - abe with ro = 0.25', function() | ||
{ | ||
const jaw = new JaroWinkler(); | ||
const distance = jaw.similarity('abcd', 'abe', {roCost:0.25}); | ||
expect(distance.toFixed(3)).equal('0.861'); | ||
}); | ||
}); | ||
}); |