New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Java Examples #12
Java Examples #12
Changes from all commits
b7a90d0
2eabe2a
8ef3f5a
7b4c69f
5712f96
e38511f
054e7b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import org.xapian.WritableDatabase; | ||
import org.xapian.XapianConstants; | ||
|
||
public class delete1 { | ||
|
||
// Command line args - dbpath identifiers... | ||
public static void main(String[] args) { | ||
if (args.length < 2) { | ||
System.out.println("Insufficient number of arguments (should be dbpath identifiers...)"); | ||
return; | ||
} | ||
// Splitting the array to obtain an array of identifiers | ||
String[] identifierArgs = new String[args.length - 1]; | ||
System.arraycopy(args, 1, identifierArgs, 0, identifierArgs.length); | ||
deleteDocs(args[0], identifierArgs); | ||
} | ||
|
||
// Start of example code. | ||
public static void deleteDocs(String dbpath, String[] identifierArgs) { | ||
// Open the database we're going to be deleting from. | ||
WritableDatabase db = new WritableDatabase(dbpath, XapianConstants.DB_OPEN); | ||
|
||
for (String identifierArg : identifierArgs) { | ||
String idterm = "Q" + identifierArg; | ||
db.deleteDocument(idterm); | ||
} | ||
|
||
// Commit to delete documents from disk | ||
db.commit(); | ||
} | ||
// End of example code. | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import java.io.File; | ||
import java.io.FileNotFoundException; | ||
import java.util.Scanner; | ||
import java.util.ArrayList; | ||
import java.util.logging.Level; | ||
import java.util.logging.Logger; | ||
import org.xapian.Document; | ||
import org.xapian.Stem; | ||
import org.xapian.TermGenerator; | ||
import org.xapian.WritableDatabase; | ||
import org.xapian.XapianConstants; | ||
import org.xapian.XapianJNI; | ||
|
||
public class index1 { | ||
|
||
// Command line args - datapath dbpath | ||
public static void main(String[] args) { | ||
if (args.length < 2) { | ||
System.out.println("Insufficient number of arguments (should be datapath dbpath)"); | ||
return; | ||
} | ||
index(args[0], args[1]); | ||
} | ||
|
||
// Start of example code. | ||
public static void index(String datapath, String dbpath) { | ||
// Create or open the database we're going to be writing to. | ||
WritableDatabase db = new WritableDatabase(dbpath, XapianConstants.DB_CREATE_OR_OPEN); | ||
// Set up a TermGenerator that we'll use in indexing. | ||
TermGenerator termGenerator = new TermGenerator(); | ||
termGenerator.setStemmer(new Stem("en")); | ||
|
||
// Parsing the CSV input file | ||
Scanner csvScanner = null; | ||
|
||
try { | ||
File csv = new File(datapath); | ||
csvScanner = new Scanner(csv); | ||
} catch (FileNotFoundException ex) { | ||
Logger.getLogger(index1.class.getName()).log(Level.SEVERE, null, ex); | ||
} | ||
|
||
// Ignoring first line (contains descriptors) | ||
csvScanner.nextLine(); | ||
|
||
while (csvScanner.hasNextLine()) { | ||
String currentLine = csvScanner.nextLine(); | ||
|
||
/* Parsing each line for identifier, title, and description */ | ||
ArrayList<String> parsedCSV = support.parseCsvLine(currentLine); | ||
// Identifier is the first comma seperated value (according to CSV file) | ||
String identifier = parsedCSV.get(0); | ||
|
||
// Title is third comma seperated value | ||
String title = parsedCSV.get(2); | ||
|
||
// Description is ninth comma sperated value | ||
String description = parsedCSV.get(8); | ||
|
||
/* Finished Parsing line */ | ||
|
||
// We make a document and tell the term generator to use this. | ||
Document doc = new Document(); | ||
termGenerator.setDocument(doc); | ||
|
||
// Index each field with a suitable prefix. | ||
termGenerator.indexText(title, 1, "S"); | ||
termGenerator.indexText(description, 1, "XD"); | ||
|
||
// Index fields without prefixes for general search. | ||
termGenerator.indexText(title); | ||
termGenerator.increaseTermpos(); | ||
termGenerator.indexText(description); | ||
|
||
// Store all fields for display purposes | ||
doc.setData(currentLine); | ||
doc.addValue(0, title); | ||
|
||
// We use the identifier to ensure each object ends up in the | ||
// database only once no matter how many times we run the | ||
// indexer. | ||
String idterm = "Q"+identifier; | ||
doc.addBooleanTerm(idterm); | ||
db.replaceDocument(idterm, doc); | ||
} | ||
|
||
// Commit to write documents to disk | ||
db.commit(); | ||
csvScanner.close(); | ||
} | ||
// End of example code. | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import org.xapian.Database; | ||
import org.xapian.Document; | ||
import org.xapian.Enquire; | ||
import org.xapian.MSet; | ||
import org.xapian.MSetIterator; | ||
import org.xapian.Query; | ||
import org.xapian.QueryParser; | ||
import org.xapian.Stem; | ||
|
||
public class search1 { | ||
|
||
// Command line args - dbpath querystring | ||
public static void main(String[] args) { | ||
if (args.length < 2) { | ||
System.out.println("Insufficient number of arguments (should be dbpath querystring)"); | ||
return; | ||
} | ||
search(args[0], args[1]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need to join all subsequent arguments together to make this compatible with the other programs. For instance, we run a multi-word search in |
||
} | ||
|
||
public static void search(String dbpath, String queryString) { | ||
search(dbpath, queryString, 0, 10); | ||
} | ||
|
||
// Start of example code. | ||
public static void search(String dbpath, String queryString, int offset, int pagesize) { | ||
// offset - defines starting point within result set | ||
// pagesize - defines number of records to retrieve | ||
|
||
// Open the databse we're going to search. | ||
Database db = new Database(dbpath); | ||
|
||
// Set up a QueryParser with a stemmer and suitable prefixes | ||
QueryParser queryParser = new QueryParser(); | ||
queryParser.setStemmer(new Stem("en")); | ||
queryParser.setStemmingStrategy(QueryParser.stem_strategy.STEM_SOME); | ||
// Start of prefix configuration. | ||
queryParser.addPrefix("title", "S"); | ||
queryParser.addPrefix("description", "XD"); | ||
// End of prefix configuration. | ||
|
||
// And parse the query | ||
Query query = queryParser.parseQuery(queryString); | ||
|
||
// Use an Enquire object on the database to run the query | ||
Enquire enquire = new Enquire(db); | ||
enquire.setQuery(query); | ||
|
||
// And print out something about each match | ||
MSet mset = enquire.getMSet(offset, pagesize); | ||
MSetIterator msetIterator = mset.begin(); | ||
|
||
while (msetIterator.hasNext()) | ||
{ | ||
long rank = msetIterator.getRank(); | ||
long docID = msetIterator.getDocId(); | ||
Document doc = db.getDocument(docID); | ||
String title = doc.getValue(0); | ||
String formattedRank = String.format("%03d",docID); | ||
|
||
System.out.println((rank+1) + ": #" + formattedRank + " " + title); | ||
msetIterator.next(); | ||
} | ||
} | ||
// End of example code. | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
1: #046 Model by Dent of mechanism for setting hands and winding up | ||
2: #004 Watch with Chinese duplex escapement | ||
3: #018 Solar/Sidereal verge watch with epicyclic maintaining power | ||
4: #013 Watch timer by P | ||
5: #094 Model of a Lever Escapement , 1850-1883 | ||
6: #093 Model of Graham's Cylinder Escapement, 1850-1883 | ||
7: #033 A device by Favag of Neuchatel which enables a stop watch to | ||
8: #015 Ingersoll "Dan Dare" automaton pocket watch with pin-pallet | ||
9: #086 Model representing Earnshaw's detent chronometer escapement, 1950-1883 | ||
10: #036 Universal 'Tri-Compax' chronographic wrist watch | ||
INFO:xapian.search:'Dent watch'[0:10] = 46 4 18 13 94 93 33 15 86 36 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
1: #055 Silver altitude sundial in leather case | ||
INFO:xapian.search:'description:"leather case" AND title:sundial'[0:10] = 55 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
1: #004 Watch with Chinese duplex escapement | ||
2: #018 Solar/Sidereal verge watch with epicyclic maintaining power | ||
3: #013 Watch timer by P | ||
4: #033 A device by Favag of Neuchatel which enables a stop watch to | ||
5: #015 Ingersoll "Dan Dare" automaton pocket watch with pin-pallet | ||
6: #036 Universal 'Tri-Compax' chronographic wrist watch | ||
7: #046 Model by Dent of mechanism for setting hands and winding up | ||
INFO:xapian.search:'watch'[0:10] = 4 18 13 33 15 36 46 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
1: #001 Ansonia Sunwatch (pocket compas dial) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume that this would fail at the moment because the docid is formatted as |
||
INFO:xapian.search:'title:sunwatch'[0:10] = 1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
/* Support code for Java examples */ | ||
import java.util.ArrayList; | ||
|
||
public class support { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Each tab is 8 spaces, should be changed to 4. |
||
// Returns an ArrayList of the parsed CSV line | ||
public static ArrayList<String> parseCsvLine(String csvLine) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method doesn't replicate the c++ example, I wasn't quite sure how to design the methods in a simple manner to act similar to the c++ support functions. |
||
ArrayList<String> words = new ArrayList<String>(); | ||
boolean insideQuote = false, endEarly = false; | ||
int start = 0, end = 0; | ||
for (int i = 0; i < csvLine.length()-1; i++) { | ||
if(csvLine.charAt(i) == ',' && !insideQuote) { | ||
if (endEarly) { | ||
words.add(csvLine.substring(start,i-1).replace("\"\"","\"")); | ||
endEarly = false; | ||
} else { | ||
words.add(csvLine.substring(start,i)); | ||
} | ||
|
||
if(csvLine.charAt(i+1) == '"') { | ||
start = i + 2; | ||
i++; | ||
endEarly = true; | ||
insideQuote = true; | ||
} else { | ||
start = i + 1; | ||
} | ||
} else if (csvLine.charAt(i) == '"') { | ||
insideQuote = !insideQuote; | ||
} | ||
} | ||
words.add(csvLine.substring(start)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right now the actual quotes characters |
||
return words; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -331,7 +331,7 @@ def get_tool_name(envvar, default): | |
if re.search(r'[^-/_+.A-Za-z0-9]', tool): | ||
# Or we could actually escape it... | ||
print("Bad characters in $%s" % envvar) | ||
sys.exit(1) | ||
# sys.exit(1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current regexp is definitely overly conservative - what characters are causing you problems here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It could be the hyphens when we we are setting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If you were passing options, the "bad character" was probably a space - the current check assumes there aren't options included with the tool (which is probably flawed). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there still a need for options with java and javac? If there isn't then we can un-comment the exit. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think your |
||
return tool | ||
|
||
# Return the command to actually test run examples using. | ||
|
@@ -376,8 +376,14 @@ def xapian_run_example_command(ex): | |
return "%s -unsafe -target:exe -out:%s.exe %s -r:XapianSharp.dll\n./%s.exe" \ | ||
% (csc, ex, xapian_code_example_filename(ex), ex) | ||
elif highlight_language == 'java': | ||
javac = get_tool_name('JAVAC', 'javac') | ||
java = get_tool_name('JAVA', 'java') | ||
java_bindings_dir = os.environ.get("JAVA_BINDINGS_DIR") | ||
classpath = '' | ||
java_library_path = '' | ||
if java_bindings_dir is not None: | ||
classpath = ' -classpath ' + java_bindings_dir + 'xapian_jni.jar:./code/java/' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should construct a full path using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure how to do this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's important to understand what a classpath string actually does. The colon ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure that You can ask Java what it is, but I'm not sure how to without compiling and running a small program, such as:
That's what we do in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From a quick look, I believe that Java just uses whatever the underlying platform uses for path separators. I certainly think that until someone on Windows has a problem we should do the 'normal' thing in Python. |
||
java_library_path = ' -Djava.library.path=' + java_bindings_dir + '../.libs' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should do this from an installed copy of the Java bindings, so the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is the installed copy located? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wherever. Java doesn't have a standard place across all platforms, which is why I'm suggesting a required environment variable to say where they are. |
||
javac = get_tool_name('JAVAC', 'javac') + classpath | ||
java = get_tool_name('JAVA', 'java') + java_library_path + classpath | ||
return "%s %s\n%s %s" \ | ||
% (javac, xapian_code_example_filename(ex), java, ex) | ||
else: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You shouldn't use value slots for this, partly because that's not what they're designed for, but also because later code in the guide uses slot 0 for something else.