Skip to content

Commit

Permalink
提取字段重构
Browse files Browse the repository at this point in the history
  • Loading branch information
ysc committed May 25, 2015
1 parent f04e8a5 commit 263aa09
Showing 1 changed file with 13 additions and 11 deletions.
Expand Up @@ -31,6 +31,8 @@
* @author 杨尚川
*/
public class JaroDistanceTextSimilarity extends TextSimilarity {
protected String shorterText = null;
protected String longerText = null;
/**
* 计算相似度分值
* @param words1 词列表1
Expand Down Expand Up @@ -58,19 +60,19 @@ protected double scoreImpl(List<Word> words1, List<Word> words2){

private double jaroDistance(String text1, String text2) {
//假设文本1长度更短
String shorter = text1.toLowerCase();
String longer = text2.toLowerCase();
shorterText = text1.toLowerCase();
longerText = text2.toLowerCase();
//如果假设不成立则交换变量的值
if (shorter.length() > longer.length()) {
String temp = shorter;
shorter = longer;
longer = temp;
if (shorterText.length() > longerText.length()) {
String temp = shorterText;
shorterText = longerText;
longerText = temp;
}
//字符交集窗口大小
int windowLength = (shorter.length() / 2) - 1;
int windowLength = (shorterText.length() / 2) - 1;
//求字符交集,m1可能会不等于m2
String m1 = getCharacterConjunction(shorter, longer, windowLength);
String m2 = getCharacterConjunction(longer, shorter, windowLength);
String m1 = getCharacterConjunction(shorterText, longerText, windowLength);
String m2 = getCharacterConjunction(longerText, shorterText, windowLength);
//一种或两种情况没有字符交集,完全不相关,相似度分值为0
if (m1.length() == 0 || m2.length() == 0) {
return 0.0;
Expand All @@ -88,8 +90,8 @@ private double jaroDistance(String text1, String text2) {
//t is half the number of transpositions
int t = transpositions/2;;
//计算距离(这里的距离也就是相似度分值了)
double distance = ( m / (double)shorter.length() +
m / (double)longer.length() +
double distance = ( m / (double)shorterText.length() +
m / (double)longerText.length() +
(m - t) / (double)m ) / 3.0;
return distance;
}
Expand Down

0 comments on commit 263aa09

Please sign in to comment.