Skip to content

Commit

Permalink
提取字段重构
Browse files Browse the repository at this point in the history
  • Loading branch information
ysc committed May 25, 2015
1 parent f04e8a5 commit 263aa09
Showing 1 changed file with 13 additions and 11 deletions.
Expand Up @@ -31,6 +31,8 @@
* @author 杨尚川 * @author 杨尚川
*/ */
public class JaroDistanceTextSimilarity extends TextSimilarity { public class JaroDistanceTextSimilarity extends TextSimilarity {
protected String shorterText = null;
protected String longerText = null;
/** /**
* 计算相似度分值 * 计算相似度分值
* @param words1 词列表1 * @param words1 词列表1
Expand Down Expand Up @@ -58,19 +60,19 @@ protected double scoreImpl(List<Word> words1, List<Word> words2){


private double jaroDistance(String text1, String text2) { private double jaroDistance(String text1, String text2) {
//假设文本1长度更短 //假设文本1长度更短
String shorter = text1.toLowerCase(); shorterText = text1.toLowerCase();
String longer = text2.toLowerCase(); longerText = text2.toLowerCase();
//如果假设不成立则交换变量的值 //如果假设不成立则交换变量的值
if (shorter.length() > longer.length()) { if (shorterText.length() > longerText.length()) {
String temp = shorter; String temp = shorterText;
shorter = longer; shorterText = longerText;
longer = temp; longerText = temp;
} }
//字符交集窗口大小 //字符交集窗口大小
int windowLength = (shorter.length() / 2) - 1; int windowLength = (shorterText.length() / 2) - 1;
//求字符交集,m1可能会不等于m2 //求字符交集,m1可能会不等于m2
String m1 = getCharacterConjunction(shorter, longer, windowLength); String m1 = getCharacterConjunction(shorterText, longerText, windowLength);
String m2 = getCharacterConjunction(longer, shorter, windowLength); String m2 = getCharacterConjunction(longerText, shorterText, windowLength);
//一种或两种情况没有字符交集,完全不相关,相似度分值为0 //一种或两种情况没有字符交集,完全不相关,相似度分值为0
if (m1.length() == 0 || m2.length() == 0) { if (m1.length() == 0 || m2.length() == 0) {
return 0.0; return 0.0;
Expand All @@ -88,8 +90,8 @@ private double jaroDistance(String text1, String text2) {
//t is half the number of transpositions //t is half the number of transpositions
int t = transpositions/2;; int t = transpositions/2;;
//计算距离(这里的距离也就是相似度分值了) //计算距离(这里的距离也就是相似度分值了)
double distance = ( m / (double)shorter.length() + double distance = ( m / (double)shorterText.length() +
m / (double)longer.length() + m / (double)longerText.length() +
(m - t) / (double)m ) / 3.0; (m - t) / (double)m ) / 3.0;
return distance; return distance;
} }
Expand Down

0 comments on commit 263aa09

Please sign in to comment.