Skip to content

Commit

Permalink
feat(hermetrics): add jaro metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
weylermaldonado committed Mar 5, 2020
2 parents b998cdd + 4b91b8b commit b3d622d
Show file tree
Hide file tree
Showing 9 changed files with 1,130 additions and 441 deletions.
1,304 changes: 869 additions & 435 deletions package-lock.json

Large diffs are not rendered by default.

80 changes: 80 additions & 0 deletions src/hermetrics/jaro.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import Metric from './metric'
import JaroCostOptions from './../interfaces/jaro-opts.interface'

class Jaro extends Metric
{
constructor (name: string = 'Jaro')
{
super(name)
}
/**
* Jaro Similarity
* @param source
* @param target
* @param cost
*/
public similarity(source: string, target: string, { deletionCost, insertionCost, substitutionCost }: JaroCostOptions = {}) : number
{
const sourceLength: number = source.length;
const targetLength: number = target.length;

if (sourceLength == 0 && targetLength == 0)
{
return 1;
}

const matchDistance: number = Math.max(sourceLength, targetLength) / 2 - 1;
let sourceMatches: Array<boolean> = new Array(sourceLength);
let targetMatches: Array<boolean> = new Array(targetLength);

let matches: number = 0;
let transpositions: number = 0;
let start: number = 0;
let end: number = 0;

for (let i = 0; i < sourceLength; i++)
{
start = Math.max(0, i - matchDistance);
end = Math.min(i+matchDistance+1, targetLength);

for (let j = start; j < end; j++ )
{
if (targetMatches[j]) continue;
if (source[i] === target[j])
{
sourceMatches[i] = true;
targetMatches[j] = true;
matches++;
break;
}
}
}
if (matches == 0) return 0;

let k: number = 0;

for (let i = 0; i < sourceLength; i++)
{
if (!sourceMatches[i]) continue;
while (!targetMatches[k]) k++;
if (source[i] != target[k]) transpositions++;
k++;
}

return ((matches / sourceLength) + (matches/targetLength) + ((matches - transpositions / 2) / matches)) / 3;
}
/**
* Jaro distance
* @param source
* @param target
* @param cost
*/
public distance (source: string, target: string, { deletionCost, insertionCost, substitutionCost }:JaroCostOptions = {} ): number
{
return 1 - this.similarity(source, target, {deletionCost, insertionCost, substitutionCost});
}


}

export default Jaro
53 changes: 53 additions & 0 deletions src/hermetrics/jaro_winkler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import Jaro from './jaro'
import JaroCostOptions from './../interfaces/jaro-opts.interface'

class JaroWinkler extends Jaro
{
constructor(name: string = 'Jaro_Winkler')
{
super(name);
}
/**
* Jaro Winkler Similarity
* @param source
* @param target
* @param cost
* @param p
*/
public similarity(source: string, target: string, {insertionCost, deletionCost, substitutionCost, lambdaCost, roCost}: JaroCostOptions = {}): number
{
const p: number = roCost ?? 0.1;

if (!( 0 <= p && p <= 0.25 ))
{
new Error("The p parameter must be between 0 and 0.25");
}


let l: number = 0;
const maxL : number = lambdaCost ?? 4;

for( let i = 0; i < maxL; i++)
{
if(source[i] != target[i]) break;
l++;
}

const j: number = super.similarity(source, target, {insertionCost, deletionCost, substitutionCost});
return j + l*p*(1 - j);

}

/**
* Jaro Winkler distance
* @param source
* @param target
* @param cost
* @param p
*/
public distance(source: string, target: string, {insertionCost, deletionCost, substitutionCost, lambdaCost, roCost}: JaroCostOptions = {}): number
{
return 1 - this.similarity(source, target, {insertionCost, deletionCost, substitutionCost, lambdaCost, roCost})
}
}
export default JaroWinkler;
1 change: 1 addition & 0 deletions src/hermetrics/levenshtein.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class Levenshtein extends Metric {
}

public distance (source: string, target: string, { deletionCost, insertionCost, substitutionCost }: LevenshteinCostOptions = {}): number {

const sourceLength: number = source.length
const targetLength: number = target.length
const removeCost: number = deletionCost ?? 1
Expand Down
49 changes: 44 additions & 5 deletions src/hermetrics/metric.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,59 @@ import LevenshteinCostOptions from '../interfaces/levenshtein-opts.interface'
class Metric {
private readonly _name: string;

constructor (name = 'Generic') {
constructor (name = 'Generic')
{
this._name = name
}

/**
* distance
*/
public distance (source: string, target: string, { deletionCost, insertionCost, substitutionCost }: LevenshteinCostOptions = {}): number {
* distance
*/
public distance (source: string, target: string, { deletionCost, insertionCost, substitutionCost }: LevenshteinCostOptions = {}): number
{
return source === target ? 0 : 1
}

public maxDistance (source: string, target: string, { deletionCost, insertionCost, substitutionCost }: LevenshteinCostOptions = {}): number {


public maxDistance (source: string, target: string, { deletionCost, insertionCost, substitutionCost }: LevenshteinCostOptions = {}): number
{
return (source.length === 0 && target.length === 0) ? 0 : 1
}
/**
*
* @param source
* @param target
* @param cost
*/
public minDistance (source: string, target: string, { deletionCost, insertionCost, substitutionCost }: LevenshteinCostOptions = {}) : number
{
return 0
}

public normalize(x: number, low: number = 0, high: number = 1): number
{
//const norm : number = 0
if (high <= low) {
return 0
}
if (x >= high) {
return 1
}
if (x <= low) {
return 0
}

return (x - low) / (high - low)
}

public normalizedDistance (source: string, target: string, {deletionCost, insertionCost, substitutionCost} : LevenshteinCostOptions = {}) : number
{
const x : number = this.distance(source, target, {deletionCost, insertionCost, substitutionCost})
const min: number= this.minDistance(source, target, {deletionCost, insertionCost, substitutionCost})
const max: number = this.maxDistance(source, target, {deletionCost, insertionCost, substitutionCost})
return this.normalize(x, min, max)
}
}

export default Metric
6 changes: 5 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import Levenshtein from './hermetrics/levenshtein'
import Metric from './hermetrics/metric'
import Jaro from './hermetrics/jaro'
import JaroWinkler from './hermetrics/jaro_winkler'

export {
Levenshtein,
Metric
Metric,
Jaro,
JaroWinkler
}
7 changes: 7 additions & 0 deletions src/interfaces/jaro-opts.interface.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
export default interface JaroCostOptions {
deletionCost?: number
insertionCost?: number
substitutionCost?: number
lambdaCost?: number
roCost?: number
}
25 changes: 25 additions & 0 deletions tests/unit/jaro.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import { describe, it } from 'mocha'
import { expect } from 'chai'
import Jaro from '../../src/hermetrics/jaro'

describe('Jaro Metric', function()
{
describe('Distance tests', function()
{
it('should return 0.278 for abcd - abe', function()
{
const jaro = new Jaro();
const distance = jaro.distance('abcd', 'abe');
expect(distance.toFixed(3)).equal('0.278')
})
})
describe('Similarity tests', function()
{
it('should return 0.722 for abcd - abe', function()
{
const jaro = new Jaro();
const distance = jaro.similarity('abcd', 'abe');
expect(distance.toFixed(3)).equal('0.722')
})
})
});
46 changes: 46 additions & 0 deletions tests/unit/jaro_winkler.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import { describe, it } from 'mocha'
import { expect } from 'chai'
import JaroWinkler from '../../src/hermetrics/jaro_winkler';

describe('Jaro Winkler Metric', function()
{
describe('Distance tests', function()
{
it('should return 0.222 for abcd - abe', function()
{
const jaw = new JaroWinkler();
const distance = jaw.distance('abcd', 'abe');
expect(distance.toFixed(3)).equal('0.222')
})
})
describe('Similarity tests', function()
{
it('should return 0.778 for abcd - abe', function()
{
const jaw = new JaroWinkler();
const distance = jaw.similarity('abcd', 'abe');
expect(distance.toFixed(3)).equal('0.778');
});

it('should return 0.750 for abcd - abe with ro = 0.05', function()
{
const jaw = new JaroWinkler();
const distance = jaw.similarity('abcd', 'abe', {roCost:0.05});
expect(distance.toFixed(3)).equal('0.750');
});

it('should return 0.750 for abcd - abe with ro = 0.15', function()
{
const jaw = new JaroWinkler();
const distance = jaw.similarity('abcd', 'abe', {roCost:0.15});
expect(distance.toFixed(3)).equal('0.806');
});

it('should return 0.750 for abcd - abe with ro = 0.25', function()
{
const jaw = new JaroWinkler();
const distance = jaw.similarity('abcd', 'abe', {roCost:0.25});
expect(distance.toFixed(3)).equal('0.861');
});
});
});

0 comments on commit b3d622d

Please sign in to comment.