Skip to content

Commit

Permalink
性能改进
Browse files Browse the repository at this point in the history
  • Loading branch information
ysc committed May 14, 2015
1 parent fd6352f commit ed3c11b
Showing 1 changed file with 12 additions and 24 deletions.
36 changes: 12 additions & 24 deletions src/main/java/org/apdplat/word/corpus/Bigram.java
Expand Up @@ -24,10 +24,7 @@
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apdplat.word.segmentation.Word; import org.apdplat.word.segmentation.Word;
import org.apdplat.word.util.AutoDetector; import org.apdplat.word.util.*;
import org.apdplat.word.util.GenericTrie;
import org.apdplat.word.util.ResourceLoader;
import org.apdplat.word.util.WordConfTools;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;


Expand All @@ -37,7 +34,7 @@
*/ */
public class Bigram { public class Bigram {
private static final Logger LOGGER = LoggerFactory.getLogger(Bigram.class); private static final Logger LOGGER = LoggerFactory.getLogger(Bigram.class);
private static final GenericTrie<Integer> GENERIC_TRIE = new GenericTrie<>(); private static final DoubleArrayGenericTrie DOUBLE_ARRAY_GENERIC_TRIE = new DoubleArrayGenericTrie(WordConfTools.getInt("bigram.double.array.trie.size", 5300000));
private static int maxFrequency = 0; private static int maxFrequency = 0;
static{ static{
reload(); reload();
Expand All @@ -47,50 +44,41 @@ public static void reload(){


@Override @Override
public void clear() { public void clear() {
GENERIC_TRIE.clear(); DOUBLE_ARRAY_GENERIC_TRIE.clear();
} }


@Override @Override
public void load(List<String> lines) { public void load(List<String> lines) {
LOGGER.info("初始化bigram"); LOGGER.info("初始化bigram");
int count=0; Map<String, Integer> map = new HashMap<>();
for(String line : lines){ for(String line : lines){
try{ try{
addLine(line); addLine(line, map);
count++;
}catch(Exception e){ }catch(Exception e){
LOGGER.error("错误的bigram数据:"+line); LOGGER.error("错误的bigram数据:"+line);
} }
} }
LOGGER.info("bigram初始化完毕,bigram数据条数:" + count); DOUBLE_ARRAY_GENERIC_TRIE.putAll(map);
LOGGER.info("bigram初始化完毕,bigram数据条数:" + map.size());
} }


@Override @Override
public void add(String line) { public void add(String line) {
try{ throw new RuntimeException("not yet support menthod!");
addLine(line);
}catch(Exception e){
LOGGER.error("错误的bigram数据:"+line);
}
} }


private void addLine(String line){ private void addLine(String line, Map<String, Integer> map){
String[] attr = line.split("\\s+"); String[] attr = line.split("\\s+");
int frequency = Integer.parseInt(attr[1]); int frequency = Integer.parseInt(attr[1]);
if(frequency > maxFrequency){ if(frequency > maxFrequency){
maxFrequency = frequency; maxFrequency = frequency;
} }
GENERIC_TRIE.put(attr[0], frequency); map.put(attr[0], frequency);
} }


@Override @Override
public void remove(String line) { public void remove(String line) {
try{ throw new RuntimeException("not yet support menthod!");
String[] attr = line.split("\\s+");
GENERIC_TRIE.remove(attr[0]);
}catch(Exception e){
LOGGER.error("错误的bigram数据:"+line);
}
} }


}, WordConfTools.get("bigram.path", "classpath:bigram.txt")); }, WordConfTools.get("bigram.path", "classpath:bigram.txt"));
Expand Down Expand Up @@ -203,7 +191,7 @@ public static float getScore(String first, String second) {
} }


public static int getFrequency(String first, String second) { public static int getFrequency(String first, String second) {
Integer value = GENERIC_TRIE.get(first+":"+second); Integer value = DOUBLE_ARRAY_GENERIC_TRIE.get(first+":"+second);
if(value == null){ if(value == null){
return 0; return 0;
} }
Expand Down

0 comments on commit ed3c11b

Please sign in to comment.