Skip to content

Commit

Permalink
Let console use user dictionary
Browse files Browse the repository at this point in the history
  • Loading branch information
wareya committed May 28, 2017
1 parent cdb06cc commit a854cda
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 32 deletions.
28 changes: 21 additions & 7 deletions src/ConsoleMain.java
Expand Up @@ -7,6 +7,7 @@
*/

public class ConsoleMain extends Main {
private static String last_message = "";
public static void main(String[] args)
{
try
Expand All @@ -25,17 +26,17 @@ public static void main(String[] args)

if (arguments.peekFirst().matches("(-h)|(--help)"))
{
println(out, "Usage: java -jar analyzer.jar <corpus.txt> (-[dswlpn] )*");
println(out, "Usage: java -jar analyzer.jar <corpus.txt> (-[fdswlpn] )*");
println(out, "\tcorpus.txt: must be in utf-8. cannot be named \"-h\" or \"--help\".");
println(out, "\t-f: disable user filters (userfilters.csv)");
println(out, "\t-d: disable user dictionary (userdict.csv)");
println(out, "\t-s: strip 〈〉 (but not their contents) and enable 《》 furigana culling (incl. contents) (operates at the code unit level, before parsing)");
println(out, "\t-w: disable 'only in dictionary' filter");
println(out, "\t-p: disable punctuation filter");
println(out, "\t-n: enable special blacklist (names and jargon from certain VNs)");
println(out, "\t-c: count lines and export index of the first line a term shows up in");
println(out, "Options must be stated separately (-p -d), not bundled (-pd)");
println(out, "");
println(out, "Output goes to console. Use > to redirect if you need to output to a file.");
println(out, "Output goes to standard output. Use > to redirect if you need to output to a file.");
}
else
{
Expand All @@ -46,15 +47,28 @@ public static void main(String[] args)
String argument = arguments.removeFirst();
if(argument.equals("-p")) filter_punctuation_enabled = false;
if(argument.equals("-w")) filter_dictionary_enabled = false;
if(argument.equals("-d")) enable_userfilter = false;
if(argument.equals("-n")) special_blacklist_enabled = true;
if(argument.equals("-f")) enable_userfilter = false;
if(argument.equals("-d")) enable_userdictionary = false;
if(argument.equals("-s")) skip_furigana_formatting = true;
if(argument.equals("-c")) enable_linecounter = true;
}

try
{
run(filename, out, (a,e)->{});
run(filename, out, (text,length)->
{
String mini_text = text.split(":")[0];
if(mini_text.equals(last_message))
System.err.printf("\r");
else
System.err.printf("\n");
if(length == 0.0)
System.err.printf(text);
else if(length > 0.0)
System.err.printf(text + " %.02f%%", length*100);
else if(!text.equals("Done"))
System.err.printf(text + " ...");
last_message = mini_text;
});
}
catch(IOException e)
{ /**/ }
Expand Down
21 changes: 1 addition & 20 deletions src/GUIMain.java
Expand Up @@ -92,26 +92,7 @@ public static void main(String[] args)
skip_furigana_formatting = option_strip_furigana.isSelected();
enable_linecounter = option_enable_linecount.isSelected();

if(option_enable_userdict.isSelected())
{
try
{
userdict = new FileInputStream("userdict.csv");
}
catch (IOException e)
{
userdict = null;
progress.setString("Failed to load user dictionary");
progress.setIndeterminate(false);
progress.setValue(0);
return;
}
}
else
{
//System.out.println("Not using user dictionary");
}

enable_userdictionary = option_enable_userdict.isSelected();
enable_userfilter = option_enable_userfilter.isSelected();

if(worker != null && worker.isAlive()) return;
Expand Down
29 changes: 24 additions & 5 deletions src/Main.java
Expand Up @@ -51,10 +51,10 @@ public static void main(String[] args)

static boolean filter_dictionary_enabled = true;
static boolean filter_punctuation_enabled = true;
static boolean special_blacklist_enabled = false;
static boolean skip_furigana_formatting = false;
static boolean enable_linecounter = false;
static boolean enable_userfilter = true;
static boolean enable_userdictionary = true;

// to force utf-8 output on windows
static BufferedWriter out;
Expand Down Expand Up @@ -174,16 +174,35 @@ private static String readline(InputStreamReader f, boolean noformat)
return null;
}

static InputStream userdict = null;
private static InputStream userdict = null;
static void run(String in_name, BufferedWriter out, BiConsumer<String, Double> update) throws IOException
{
update.accept("Loading user filter", 0.0);
if(enable_userdictionary)
{
try
{
userdict = new FileInputStream("userdict.csv");
}
catch (IOException e)
{
userdict = null;
update.accept("Failed to load user dictionary", -1.0);
return;
}
}

if(enable_userfilter)
{
update.accept("Loading user filter", -1.0);
try
{
init_filter();
}
catch (UnsupportedEncodingException e)
{
update.accept("Failed to open userfilters.csv as UTF-8.", 0.0);
return;
}
catch (IOException e)
{
update.accept("File access error occurred when initializing user filters.", 0.0);
Expand Down Expand Up @@ -220,12 +239,12 @@ static void run(String in_name, BufferedWriter out, BiConsumer<String, Double> u
Tokenizer tokenizer;
if(userdict != null)
{
update.accept("Initializing kuromoji with user dictionary.", 0.0);
update.accept("Initializing kuromoji with user dictionary", -1.0);
tokenizer = new Tokenizer.Builder().userDictionary(userdict).build();
}
else
{
update.accept("Initializing kuromoji without user dictionary.", 0.0);
update.accept("Initializing kuromoji without user dictionary", -10.0);
tokenizer = new Tokenizer.Builder().build();
}

Expand Down

0 comments on commit a854cda

Please sign in to comment.