Skip to content

Commit

Permalink
自主学习进行PCA识别,自动处理词库优化.20190427
Browse files Browse the repository at this point in the history
  • Loading branch information
yaoguangluo committed Apr 27, 2019
1 parent 5c6f0eb commit 1feb189
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 2 deletions.
1 change: 1 addition & 0 deletions wordSegment/org/tinos/engine/analysis/Analyzer.java
Expand Up @@ -26,4 +26,5 @@ public interface Analyzer {
List<String> parserMixedString(String mixedString);
Map<String,WordFrequency> parserMixStringByReturnFrequencyMap(String key);
void studyNewWord(String study, String token, String posStudy);
Map<String, String> getStudyPos();
}
5 changes: 5 additions & 0 deletions wordSegment/org/tinos/engine/analysis/imp/AnalyzerImp.java
Expand Up @@ -462,4 +462,9 @@ public void studyNewWord(String study, String token, String posStudy) {
//learn new pos
fHMMList.studyNewPos(study+token, posStudy);
}

@Override
public Map<String, String> getStudyPos() {
return fHMMList.getStudyPos();
}
}
3 changes: 2 additions & 1 deletion wordSegment/org/tinos/ortho/fhmm/FHMMList.java
Expand Up @@ -44,5 +44,6 @@ public interface FHMMList {
Map<String, String> getFullPositive();
List<String> englishStringToWordsList(String string);
Map<Long, Map<String, String>> getWordsForests();
void studyNewPos(String string, String posStudy);
void studyNewPos(String string, String posStudy);
Map<String, String> getStudyPos();
}
5 changes: 5 additions & 0 deletions wordSegment/org/tinos/ortho/fhmm/imp/FMHMMListImp.java
Expand Up @@ -276,4 +276,9 @@ public void studyNewPos(String string, String posStudy) {
// TODO Auto-generated method stub

}

@Override
public Map<String, String> getStudyPos() {
return null;
}
}
9 changes: 9 additions & 0 deletions wordSegment/org/tinos/ortho/fhmm/imp/FMHMMListOneTimeImp.java
Expand Up @@ -19,6 +19,7 @@
//I will build a collection class for managing this maps. at the next version.
@SuppressWarnings("unchecked")
public class FMHMMListOneTimeImp implements FHMMList {
private Map<String, String> studyPos;
private Map<String, String> posCnToCn;
private Map<String, String> posEnToEn;
private Map<String, String> posEnToCn;
Expand Down Expand Up @@ -75,6 +76,7 @@ public Map<Long, FMHMMNode>[] getMaps() {
}

public void indexMixed() throws IOException {
studyPos= new ConcurrentHashMap<>();
posCnToCn= new ConcurrentHashMap<>();
linkedHashMap= new ConcurrentHashMap<>();
listCn= new CopyOnWriteArrayList<>();
Expand Down Expand Up @@ -477,6 +479,8 @@ public void indexMixed() throws IOException {
.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ONE]);
}
}
studyPos.put(cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO], cInputString
.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ONE]);
posCnToCn.put(cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO], cInputString
.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ONE]);
linkedHashMap = loopLoadForest(cInputString);
Expand Down Expand Up @@ -1058,4 +1062,9 @@ public Map<Long, Map<String, String>> getWordsForests() {
public void studyNewPos(String string, String posStudy) {
posCnToCn.put(string, posStudy);
}

@Override
public Map<String, String> getStudyPos() {
return this.studyPos;
}
}
27 changes: 26 additions & 1 deletion wordSegment/org/tinos/ortho/fhmm/imp/poscc.lyg
Expand Up @@ -26373,4 +26373,29 @@ z/字母
  /段落标识标点
同桌/名词代词
睡梦/名词
反差/名词
反差/名词
相传/动词
瑰宝/名词
盾盾/形谓词
又怎/副词
去接/动词
力棒/名词
州娃/名词
食疗/名词
”,/标点
潮头/名词
带动/动词
也是/副词
看到/动词
,“/标点
流变/动词
带来/动词
惊喜/形谓词
启超/动词
:“/标点
。”/标点
担负/动词
负起/动词
传到/动词
去关/动词
血脉/名词

0 comments on commit 1feb189

Please sign in to comment.