Skip to content

Commit

Permalink
一直在优化5.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
yaoguangluo committed Oct 14, 2018
1 parent c61771a commit 7378b8d
Show file tree
Hide file tree
Showing 10 changed files with 202 additions and 36 deletions.
4 changes: 4 additions & 0 deletions main/src/org/tinos/engine/analysis/Analyzer.java
Expand Up @@ -4,10 +4,14 @@
import java.util.List;
import java.util.Map;

import org.tinos.view.obj.WordFrequency;

public interface Analyzer {
void init() throws IOException;

List<String> parserString(String input);

Map<String, String> getWord() throws IOException;

List<WordFrequency> getWordFrequency(List<String> sets) throws IOException;
}
10 changes: 7 additions & 3 deletions main/src/org/tinos/engine/analysis/CogsBinaryForestAnalyzer.java
@@ -1,6 +1,10 @@
package org.tinos.engine.analysis;

public interface CogsBinaryForestAnalyzer extends BinaryForestAnalyzer {
}
import java.io.IOException;
import java.util.List;

import org.tinos.view.obj.WordFrequency;


public interface CogsBinaryForestAnalyzer extends BinaryForestAnalyzer {
List<WordFrequency> getWordFrequency(List<String> sets) throws IOException;
}
Expand Up @@ -2,11 +2,13 @@

import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.LinkedList;
import java.io.IOException;

import org.tinos.ortho.fhmm.FHMMList;
import org.tinos.ortho.fhmm.imp.FMHMMListImp;
import org.tinos.view.obj.WordFrequency;
import org.tinos.view.stable.StableData;
import org.tinos.engine.nero.NEROController;
import org.tinos.engine.nero.imp.NEROControllerImp;
Expand All @@ -15,19 +17,23 @@
import org.tinos.engine.pos.POSController;
import org.tinos.engine.pos.imp.POSControllerImp;
import org.tinos.engine.analysis.CogsBinaryForestAnalyzer;
import org.tinos.engine.linerScience.Quick6DLuoYaoguangSort;
import org.tinos.engine.linerScience.imp.Quick6DLuoYaoguangSortImp;

public class CogsBinaryForestAnalyzerImp implements CogsBinaryForestAnalyzer {
private FHMMList fHMMList;
private NEROController neroController;
private NLPController nlpController;
private POSController posController;
private Quick6DLuoYaoguangSort quick6DLuoYaoguangSort;

public void init() throws IOException {
this.fHMMList = new FMHMMListImp();
fHMMList.index();
neroController = new NEROControllerImp();
nlpController = new NLPControllerImp();
posController = new POSControllerImp();
quick6DLuoYaoguangSort = new Quick6DLuoYaoguangSortImp();
}

@SuppressWarnings(StableData.RAW_TYPES)
Expand Down Expand Up @@ -71,14 +77,13 @@ public List<String> parserString(String inputString) {
outputList.add(countWordNode);
fixWords[StableData.INT_ZERO].delete(StableData.INT_ZERO, fixWords[StableData.INT_ZERO].length());
fixWords[StableData.INT_ZERO].append(countWordNode);
continue Here;
}
}
return outputList;
}

private void addFixWords(int charPosition, String inputString, StringBuilder[] fixWords) {
fixWords[StableData.INT_ONE].delete(0, fixWords[StableData.INT_ONE].length());
fixWords[StableData.INT_ONE].delete(StableData.INT_ZERO, fixWords[StableData.INT_ONE].length());
if (charPosition + StableData.INT_EIGHT < inputString.length()) {
fixWords[StableData.INT_ONE].append(inputString.substring(charPosition + StableData.INT_THREE
, charPosition + StableData.INT_EIGHT));
Expand All @@ -91,4 +96,24 @@ private void addFixWords(int charPosition, String inputString, StringBuilder[] f
public Map<String, String> getWord() {
return fHMMList.getWords();
}

public List<WordFrequency> getWordFrequency(List<String> sets) throws IOException {
Map<String, WordFrequency> map = new ConcurrentHashMap<>();
for (int i = StableData.INT_ZERO; i < sets.size(); i++) {
if (map.containsKey(sets.get(i))) {
WordFrequency wordFrequency = map.get(sets.get(i));
wordFrequency.setFrequency(wordFrequency.getFrequency() + StableData.INT_ONE);
map.put(sets.get(i), wordFrequency);
} else {
WordFrequency wordFrequency = new WordFrequency();
wordFrequency.setFrequency(StableData.INT_ONE);
wordFrequency.setWord(sets.get(i));
map.put(sets.get(i), wordFrequency);
}
}
List<WordFrequency> list = quick6DLuoYaoguangSort.frequencyWordMapToList(map);
quick6DLuoYaoguangSort.quick6DLuoYaoGuangSortWordFrequency(list, StableData.INT_ZERO
, list.size() - StableData.INT_ONE);
return list;
}
}
15 changes: 15 additions & 0 deletions main/src/org/tinos/engine/linerScience/Quick6DLuoYaoguangSort.java
@@ -0,0 +1,15 @@
package org.tinos.engine.linerScience;

import java.util.List;
import java.util.Map;

import org.tinos.view.obj.WordFrequency;

public interface Quick6DLuoYaoguangSort {

void quick6DLuoYaoGuangSortWordFrequency(List<WordFrequency> list, int leftPosition, int rightPosition);

int partition(List<WordFrequency> list, int leftPosition, int rightPosition);

List<WordFrequency> frequencyWordMapToList(Map<String, WordFrequency> map);
}
@@ -0,0 +1,70 @@
package org.tinos.engine.linerScience.imp;

import java.util.ArrayList;
import java.util.Iterator;

import java.util.List;
import java.util.Map;

import org.tinos.view.obj.WordFrequency;
import org.tinos.view.stable.StableData;
import org.tinos.engine.linerScience.Quick6DLuoYaoguangSort;

public class Quick6DLuoYaoguangSortImp implements Quick6DLuoYaoguangSort {
public void quick6DLuoYaoGuangSortWordFrequency(List<WordFrequency> list, int leftPosition, int rightPosition) {
if (leftPosition < rightPosition) {
int c = rightPosition - leftPosition + StableData.INT_ONE;
if (c < StableData.INT_SEVEN) {
int j;
for (int i = StableData.INT_ONE + leftPosition; i < leftPosition + c; i++) {
j = i;
while (j >= StableData.INT_ONE + leftPosition) {
if (list.get(j).getFrequency() < list.get(j - StableData.INT_ONE).getFrequency()) {
WordFrequency wordFrequency = list.get(j);
list.set(j, list.get(j - StableData.INT_ONE));
list.set(j - StableData.INT_ONE, wordFrequency);
}
j--;
}
}
} else {
int pos = partition(list, leftPosition, rightPosition);
quick6DLuoYaoGuangSortWordFrequency(list, leftPosition, pos - StableData.INT_ONE);
quick6DLuoYaoGuangSortWordFrequency(list, pos + StableData.INT_ONE, rightPosition);
}
}
}

public int partition(List<WordFrequency> list, int leftPosition, int rightPosition) {
int rightPositionNew = rightPosition;
int leftPositionNew = leftPosition;
WordFrequency wordFrequencyX = list.get(leftPosition);
WordFrequency wordFrequencyY = list.get(rightPosition);
if (wordFrequencyX.getFrequency() <= wordFrequencyY.getFrequency()) {
wordFrequencyY = wordFrequencyX;
}
while (leftPositionNew < rightPositionNew) {
while ((list.get(leftPositionNew).getFrequency() <= wordFrequencyY.getFrequency())
&& (leftPositionNew < rightPositionNew)) leftPositionNew++;
while (list.get(rightPositionNew).getFrequency() > wordFrequencyY.getFrequency()) rightPositionNew--;
if (leftPositionNew < rightPositionNew) {
WordFrequency wordFrequency = list.get(rightPositionNew);
list.set(rightPositionNew, list.get(leftPositionNew));
list.set(leftPositionNew, wordFrequency);
}
}
list.set(leftPosition, list.get(rightPositionNew));
list.set(rightPositionNew, wordFrequencyY);
return rightPositionNew;
}

@SuppressWarnings(StableData.RAW_TYPES)
public List<WordFrequency> frequencyWordMapToList(Map<String, WordFrequency> map) {
List<WordFrequency> list = new ArrayList<>();
Iterator iterator = map.keySet().iterator();
while (iterator.hasNext()) {
list.add(map.get(iterator.next()));
}
return list;
}
}
7 changes: 4 additions & 3 deletions main/src/org/tinos/ortho/fhmm/imp/words.lyg
Expand Up @@ -10550,6 +10550,9 @@ $/标点
手球/名词
围/动词
用电子/名词
杨过/名词
小龙女/名词
龙女/名词
细胞/名词
药/名词
效/名词
Expand Down Expand Up @@ -12405,9 +12408,7 @@ $/标点
谋/动词
漫/形谓词
叭/名词
杨过/名词
小龙女/名词
重逢/动词
诺贝尔/名词
戈/人名
激发病/形谓词
提示无/动词
Expand Down
51 changes: 24 additions & 27 deletions main/src/org/tinos/test/DemoEX.java
Expand Up @@ -7,60 +7,57 @@

import org.tinos.engine.analysis.Analyzer;
import org.tinos.engine.analysis.imp.CogsBinaryForestAnalyzerImp;
import org.tinos.view.obj.WordFrequency;
import timeProcessor.TimeCheck;

@SuppressWarnings("unused")
public class DemoEX {
@SuppressWarnings({"unchecked", "rawtypes"})
public static void main(String[] args) throws IOException {
Analyzer analyzer = new CogsBinaryForestAnalyzerImp();
analyzer.init();
Map<String, String> nlp = analyzer.getWord();
Map<String, String> pos = analyzer.getWord();
List<String> sets = new ArrayList<>();
TimeCheck t = new TimeCheck();
String ss = "结婚的和尚未结婚的等和尚未成家之人都和尚未"
+ "成佛的和尚未必一样和尚未来的和尚未和从容"
+ "易开始念经那和尚未进行告别不显得从容易知"
+ "和尚未结婚的施主一样其实都不和尚未成佛的"
+ "心态有关因为这和尚未成佛虎头虎脑的虎头虎脑人";
// String ss = "杨过和小龙女重逢了";
t.begin();
String a = new String();
// String ss = "和尚未出家前这个和尚未和尚未成家之人组成家庭过,各位和尚们不要怪这个和尚";

for (int i = 0; i < 100000; i++) { //重复40万次数 相当于处理 1000万字
String ss = "科学的发展是一种传承,每一个获得诺贝尔奖的科学家,都是通过长时间对问题的优化中不断总结和分化" +
",最终得到科学的成果";
t.begin();
for (int i = 0; i < 1000000; i++) { //重复100万次数 相当于处理 5700来万字
sets = analyzer.parserString(ss);//词性分析
}
// for (int i = 0; i < 100000000; i++) { //重复40万次数 相当于处理 1000万字
// // sets = analyzer.parserString(ss);//词性分析
//// a="sas";
//// a="";
// }
t.end();


StringBuilder a2 = new StringBuilder();
for (int i = 0; i < 100000000; i++) { //重复40万次数 相当于处理 1000万字
// sets = analyzer.parserString(ss);//词性分析
a2.append("sas");
a2.delete(0, 3);
}


System.out.print("分析处理真实结果-->");
for (int i = 0; i < sets.size(); i++) {
if (!sets.get(i).equals("")) {
System.out.print(sets.get(i) + " ");
}
}
System.out.println("");
t.duration();
System.out.println("");
System.out.println("词性分析-->");
t.begin();
for (int j = 0; j < 1; j++) {
for (int i = 0; i < sets.size(); i++) {
if (!sets.get(i).replaceAll("\\s+", "").equals("")) {
nlp.get(sets.get(i));
System.out.println(sets.get(i) + "/" + nlp.get(sets.get(i)) + " ");
System.out.print(sets.get(i) + "/" + pos.get(sets.get(i)) + "----");
}
}
}
t.end();
System.out.println("");
t.duration();
System.out.println("");
System.out.println("词频分析-->");
t.begin();
List<WordFrequency> fwa = analyzer.getWordFrequency(sets);
t.end();
for (int i = fwa.size() - 1; i >= 0; i--) {
System.out.print(fwa.get(i).getWord() + ":" + fwa.get(i).getFrequency() + "----");
}
System.out.println("");
t.duration();
}
}
2 changes: 1 addition & 1 deletion main/src/org/tinos/test/DemoPOS.java
Expand Up @@ -57,7 +57,7 @@ public static void main(String[] args) throws IOException {
+ " 心态 有关 因为 这 和尚 未 成佛";
ss1[1] = "沿 海 南 方向 逃跑";
ss1[2] = "他 说 的 确实 在理";
ss1[3] = "杨过 和 小龙女 重逢 了";
ss1[3] = "杨过 和 小龙女 离婚 了";
ss1[4] = "结婚 的 和 尚未 结婚 的";
ss1[5] = "提高 产品 质量";
ss1[6] = "中外 科学 名著";
Expand Down
49 changes: 49 additions & 0 deletions main/src/org/tinos/view/obj/WordFrequency.java
@@ -0,0 +1,49 @@
package org.tinos.view.obj;

public class WordFrequency {
public String getPOS() {
return POS;
}

public void setPOS(String POS) {
this.POS = POS;
}

public String getWord() {
return word;
}

public void setWord(String word) {
this.word = word;
}

public int getFrequency() {
return frequency;
}

public void setFrequency(int frequency) {
this.frequency = frequency;
}

public WordFrequency getLeft() {
return left;
}

public void setLeft(WordFrequency left) {
this.left = left;
}

public WordFrequency getRight() {
return right;
}

public void setRight(WordFrequency right) {
this.right = right;
}

private String word;
private String POS;
private int frequency;
private WordFrequency left;
private WordFrequency right;
}
1 change: 1 addition & 0 deletions main/src/org/tinos/view/stable/StableData.java
Expand Up @@ -22,6 +22,7 @@ public class StableData {
public static final int INT_THREE = 3;
public static final int INT_FOUR = 4;
public static final int INT_SIX = 6;
public static final int INT_SEVEN = 7;
public static final int INT_TEN = 10;
public static final int INT_EIGHT = 8;
public static final int INT_NINE = 9;
Expand Down

0 comments on commit 7378b8d

Please sign in to comment.