Skip to content

Commit

Permalink
1-2ms版本。之前的版本每个信息doc索引居然 50 - 150ms
Browse files Browse the repository at this point in the history
  • Loading branch information
wyhw committed Mar 28, 2014
1 parent 48a9579 commit 8f9abda
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 11 deletions.
2 changes: 1 addition & 1 deletion src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
class AnalyzeContext {

//默认缓冲区大小
private static final int BUFF_SIZE = 4096;
private static final int BUFF_SIZE = 16384;
//缓冲区耗尽的临界值
private static final int BUFF_EXHAUST_CRITICAL = 100;

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public void analyze(AnalyzeContext context) {
//处理词段队列
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
for(Hit hit : tmpArray){
hit = Dictionary.getSingleton().matchWithHit(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor() , hit);
hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
if(hit.isMatch()){
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
Expand All @@ -77,7 +77,7 @@ public void analyze(AnalyzeContext context) {

//*********************************
//再对当前指针位置的字符进行单字匹配
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor(), 1);
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
if(singleCharHit.isMatch()){//首字成词
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
Expand Down
20 changes: 13 additions & 7 deletions src/main/java/org/wltea/analyzer/dic/Dictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,20 @@
*/
package org.wltea.analyzer.dic;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collection;
import java.util.List;

import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.wltea.analyzer.cfg.Configuration;

import java.io.*;
import java.util.Collection;
import java.util.List;

/**
* 词典管理类,单子模式
*/
Expand Down Expand Up @@ -152,15 +158,15 @@ public Hit matchInMainDict(char[] charArray){
* @return Hit 匹配结果描述
*/
public Hit matchInMainDict(char[] charArray , int begin, int length){
return singleton._MainDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
return singleton._MainDict.match(charArray, begin, length);
}

/**
* 检索匹配量词词典
* @return Hit 匹配结果描述
*/
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
return singleton._QuantifierDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
return singleton._QuantifierDict.match(charArray, begin, length);
}


Expand All @@ -179,7 +185,7 @@ public Hit matchWithHit(char[] charArray , int currentIndex , Hit matchedHit){
* @return boolean
*/
public boolean isStopWord(char[] charArray , int begin, int length){
return singleton._StopWords.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length).isMatch();
return singleton._StopWords.match(charArray, begin, length).isMatch();
}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public boolean incrementToken() throws IOException {
if(nextLexeme != null){
//将Lexeme转成Attributes
//设置词元文本
termAtt.append(nextLexeme.getLexemeText().toLowerCase());
termAtt.append(nextLexeme.getLexemeText());
//设置词元长度
termAtt.setLength(nextLexeme.getLength());
//设置词元位移
Expand Down

0 comments on commit 8f9abda

Please sign in to comment.