Skip to content

Commit

Permalink
[#8] Started work on prioritizing words with higher counts
Browse files Browse the repository at this point in the history
  • Loading branch information
Frederik Kammel committed Sep 12, 2016
1 parent 2b415cb commit 63c3357
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 17 deletions.
23 changes: 13 additions & 10 deletions src/main/java/algorithm/HangmanSolver.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public static Result solve(String currentSequence, Language lang) {
// Go through all words
for (String word : words) {
// Get all words from the database with equal length
List<String> wordsWithEqualLength = database.getValuesWithLength(2, word.length());
List<Word> wordsWithEqualLength = database.getValuesWithLength(2, 3, word.length());

// Check if there are words that match 90%
String bestWord = database.getValueWithHighestCorrelation(2, word, proposedSolutions);
Expand Down Expand Up @@ -147,18 +147,18 @@ private static void loadLanguageDatabases(Language lang) {
/**
* Returns the most frequent char in the given word list.
*
* @param words
* @param wordsWithEqualLength
* The list for which the most frequent char will be determined.
* @return The most frequent char.
*/
private static char getMostFrequentChar(List<String> words) {
return getMostFrequentChar(words, new char[0]);
private static char getMostFrequentChar(List<Word> wordsWithEqualLength) {
return getMostFrequentChar(wordsWithEqualLength, new char[0]);
}

/**
* Returns the most frequent char in the given word list.
*
* @param words
* @param wordsWithEqualLength
* The list for which the most frequent char will be determined.
* @param priorityChars
* The ranking of most frequent chars will be filtered so that it
Expand All @@ -167,7 +167,7 @@ private static char getMostFrequentChar(List<String> words) {
* the method acts like {@code getMostFrequentChar(words)}
* @return The most frequent char.
*/
private static char getMostFrequentChar(List<String> words, char[] priorityChars) {
private static char getMostFrequentChar(List<Word> wordsWithEqualLength, char[] priorityChars) {
ArrayList<Thread> threads = new ArrayList<Thread>();
AtomicInteger currentIndex = new AtomicInteger(0);
List<CustomAtomicInteger> charCounts = new ArrayList<CustomAtomicInteger>();
Expand All @@ -182,15 +182,15 @@ private static char getMostFrequentChar(List<String> words, char[] priorityChars
// ArrayList<AtomicInteger>(Collections.nCopies(Character.MAX_VALUE, new
// AtomicInteger(0)));

log.getLogger().info("Dictionary size: " + words.size());
log.getLogger().info("Dictionary size: " + wordsWithEqualLength.size());
System.out.println("Counting...");
for (int i = 0; i < Config.getParallelThreadCount(); i++) {
threads.add(new Thread() {
@Override
public void run() {
int index = currentIndex.getAndIncrement();
while (index < words.size()) {
countAllCharsInString(words.get(index), charCounts);
while (index < wordsWithEqualLength.size()) {
countAllCharsInString(wordsWithEqualLength.get(index).getWord(), charCounts);

// Grab the next index
index = currentIndex.getAndIncrement();
Expand Down Expand Up @@ -295,7 +295,10 @@ private static boolean charArrayContais(char[] array, char value) {
* multithreading-context, the List specified as
* {@code countList} must contain
* {@link CustomAtomicInteger}s.<br>
* <b>Because of a better performance, this method uses a given list and modifies it rather than creating a new one that has to be summed up to existing ones in another slow {@code for}-loop</b>
* <b>Because of a better performance, this method uses a given
* list and modifies it rather than creating a new one that has
* to be summed up to existing ones in another slow
* {@code for}-loop</b>
*/
private static void countAllCharsInString(String str, List<CustomAtomicInteger> countList) {
for (char chr : str.toCharArray()) {
Expand Down
67 changes: 67 additions & 0 deletions src/main/java/algorithm/Word.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package algorithm;

/**
* A class that contains a {@link String} and a count how often users used this
* word already. This allows the algorithm to prioritize words.
*
* @author frede
*
*/
public class Word {

private String word;
private int count;

public Word() {
this("");
}

public Word(String word) {
this(word, 0);
}

public Word(String word, int count) {
this.setWord(word);
this.setCount(count);
}

/**
* @return the word
*/
public String getWord() {
return word;
}

/**
* @param word
* the word to set
*/
public void setWord(String word) {
this.word = word;
}

/**
* @return the count
*/
public int getCount() {
return count;
}

/**
* @param count
* the count to set
*/
public void setCount(int count) {
this.count = count;
}

@Override
public boolean equals(Object o) {
if (o instanceof Word) {
return (this.getWord().equals(((Word) o).getWord()));
} else {
return false;
}
}

}
51 changes: 46 additions & 5 deletions src/main/java/languages/TabFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.commons.io.FileUtils;

import algorithm.HangmanSolver;
import algorithm.Word;
import common.*;

public class TabFile {
Expand Down Expand Up @@ -264,13 +265,49 @@ public List<Integer> indexOf(String valueToFind, int columnIndex) {
return res;
}

/**
* Replaces the old value at the specified positions in the *.tab-file with
* the new value.
*
* @param newValue
* The new value o fthe given cells
* @param columnsAndRows
* A list of column- and row indexes where the value will be
* replaced. See the return value of {@link #indexOf(String)} to
* see how the list needs to be built up.
* @see #indexOf(String)
*/
public void setValueAt(String newValue, List<List<Integer>> columnsAndRows) {
for (int c = 0; c < columnsAndRows.size(); c++) {
setValueAt(newValue, columnsAndRows.get(c), c);
}
}

/**
* Replaces the old value at the specified positions in the *.tab-file with
* the new value.
*
* @param newValue
* The new value of the given cells
* @param rows
* A list of rows the values will be replaced
* @param column
* The column of the cells to be replaced
* @see #indexOf(String, int)
*/
public void setValueAt(String newValue, List<Integer> rows, int column) {
for (int row : rows) {
setValueAt(newValue, row, column);
}
}

/**
* Replaces the old value at the given position in the *.tab-file with the
* new Value. This method cannot add rows to the *.tab-file. To add rows,
* use {@link #addRow}
*
* @param newValue
* Thenew value of the given cell
* The new value of the given cell
* @param row
* The row of the cell to be replaced.
* @param column
Expand Down Expand Up @@ -301,12 +338,16 @@ public void addRow(String[] newValues) {
* @return A {@link List} with all values in the specified column that have
* the specified length.
*/
public List<String> getValuesWithLength(int column, int length) {
List<String> res = new ArrayList<String>();
public List<Word> getValuesWithLength(int column, int countColumn, int length) {
List<Word> res = new ArrayList<Word>();

for (int i = 0; i < this.getRowCount(); i++) {
if (this.getValueAt(i, column).length() == length) {
res.add(this.getValueAt(i, column));
if (!this.getValueAt(i, countColumn).replaceAll(" ", "").equals("")) {
res.add(new Word(this.getValueAt(i, column), Integer.parseInt(this.getValueAt(i, countColumn))));
} else {
res.add(new Word(this.getValueAt(i, column), 0));
}
}
}

Expand Down Expand Up @@ -435,7 +476,7 @@ public void save(File destinationFile) {
}

str.append("\n");
// str = str + String.join(" ", line) + "\n";
// str = str + String.join(" ", line) + "\n";
}

System.out.println("Done!");
Expand Down
9 changes: 7 additions & 2 deletions src/main/java/stats/HangmanStats.java
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,14 @@ public static void mergeWithDictionary(TabFile dictionary, Language lang) {
MongoCollection<Document> coll = MongoSetup.getWordsUsedCollection();
for (Document doc : coll.find(Filters.eq("lang", lang.getLanguageCode()))) {
String word = doc.get("word").toString();
if (dictionary.indexOf(word, 2).isEmpty()){
int count = doc.getInteger("count");

List<Integer>indexList = dictionary.indexOf(word, 2);
if (indexList.isEmpty()){
// Word not yet present in dictionary so add it
dictionary.addRow(new String[]{"fromOnlineDatabase", lang.getLanguageCode() + ":lemma", word, " "});
dictionary.addRow(new String[]{"fromOnlineDatabase", lang.getLanguageCode() + ":lemma", word, Integer.toString(count)});
}else{
dictionary.setValueAt(Integer.toString(count), indexList, 3);
}
}
log.getLogger().info("Merge finished");
Expand Down

0 comments on commit 63c3357

Please sign in to comment.