Skip to content

Commit

Permalink
bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaoling committed May 15, 2014
1 parent cec0d29 commit 74c55fa
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 26 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MultiR License Agreement
FIGER License Agreement
|
All of the documentation and software included in the FIGER Software is
copyrighted by Xiao Ling and Daniel S. Weld.
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ that `testFile` MUST take a file with .txt extension. Then run:

The training data `train.tar.gz` is serialized in [Protocol Buffer](http://code.google.com/p/protobuf/). Please see `entity.proto` in the code package for the definitions.

Download (link)[https://www.dropbox.com/sh/fg9geomqxhh54qw/AAC6LWI4gsnCXuPeQWV5b5yNa/train.tar.gz]
Download [link](https://www.dropbox.com/sh/fg9geomqxhh54qw/AAC6LWI4gsnCXuPeQWV5b5yNa/train.tar.gz)

4 changes: 2 additions & 2 deletions aaai/exp.conf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ inputSegments=aaai/exp.segment

# use an existing model
useModel=true
modelFile=config/figer.model
modelFile=figer.model.gz

# Test parameters
#*****test file has to have .txt extension*******
Expand All @@ -23,7 +23,7 @@ eval=true
labelFile=aaai/exp.label

# Train parameters
trainFile=train.data
trainFile=train.data.gz
MAX_ITER_NUM=15
FEATURE_FREQ_THRESHOLD=1
testWhenLearn=false
Expand Down
4 changes: 2 additions & 2 deletions config/figer.conf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ inputSegments=

# use an existing model
useModel=true
modelFile=config/figer.model
modelFile=figer.model.gz

# Test parameters
#*****test file has to have .txt extension*******
Expand All @@ -21,7 +21,7 @@ eval=false
labelFile=

# Train parameters
trainFile=train.data
trainFile=train.data.gz
sampleTrain=false
MAX_ITER_NUM=15
FEATURE_FREQ_THRESHOLD=1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import java.util.Hashtable;
import java.util.Scanner;

import edu.washington.cs.figer.util.FileUtil;
import edu.washington.cs.figer.util.X;

public class MapType {
Expand Down
43 changes: 25 additions & 18 deletions src/main/java/edu/washington/cs/figer/data/MentionReader.java
Original file line number Diff line number Diff line change
@@ -1,31 +1,35 @@
package edu.washington.cs.figer.data;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;

import edu.washington.cs.figer.data.EntityProtos.Mention;

public class MentionReader {
public static void main(String[] args) {
/*String[] files = new String[]{"/projects/pardosa/data14/xiaoling/data/wex.train.pbf",
"/projects/pardosa/data14/xiaoling/data/wex.dev.pbf",
"/projects/pardosa/data14/xiaoling/data/wex.test.pbf"};
MentionWriter writer = MentionWriter.getMentionWriter("train.data.noparse");
for (String file: files) {
Debug.pl(file);
MentionReader reader = getMentionReader(file);
Mention mention = null;
while ((mention = reader.readMention())!=null) {
writer.writeObject(mention.toBuilder().clearFeatures()
.clearDeps().clearFileid().clearPosTags().clearSentid().clearEntityName().build());
}
}
writer.close();*/
/*
* String[] files = new
* String[]{"/projects/pardosa/data14/xiaoling/data/wex.train.pbf",
* "/projects/pardosa/data14/xiaoling/data/wex.dev.pbf",
* "/projects/pardosa/data14/xiaoling/data/wex.test.pbf"};
* MentionWriter writer =
* MentionWriter.getMentionWriter("train.data.noparse");
* for (String file: files) {
* Debug.pl(file);
* MentionReader reader = getMentionReader(file);
* Mention mention = null;
* while ((mention = reader.readMention())!=null) {
* writer.writeObject(mention.toBuilder().clearFeatures()
* .clearDeps().clearFileid().clearPosTags().clearSentid().clearEntityName
* ().build());
* }
* }
* writer.close();
*/
}



public String inputFile = null;
public InputStream inputStream = null;
public Mention current = null;
Expand All @@ -43,7 +47,10 @@ public static MentionReader getMentionReader(String file) {
reader = new MentionReader();
reader.inputFile = file;
reader.inputStream = new FileInputStream(file);
} catch (FileNotFoundException e) {
if (file.endsWith(".gz")) {
reader.inputStream = new GZIPInputStream(reader.inputStream);
}
} catch (IOException e) {
e.printStackTrace();
}
return reader;
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/edu/washington/cs/figer/util/Serializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.zip.GZIPInputStream;

public class Serializer {
public static boolean serialize(Object obj, String filename) {
Expand All @@ -29,11 +31,14 @@ public static Object deserialize(String filename) {
if (filename == null) {
return null;
}
FileInputStream fis = null;
InputStream fis = null;
ObjectInputStream in = null;
Object obj = null;
try {
fis = new FileInputStream(filename);
if (filename.endsWith(".gz")) {
fis = new GZIPInputStream(fis);
}
in = new ObjectInputStream(fis);
obj = in.readObject();
in.close();
Expand Down

0 comments on commit 74c55fa

Please sign in to comment.