From b7a90d0430931cbbe25ca2dcf3668b17e77a502c Mon Sep 17 00:00:00 2001 From: Aakash Muttineni Date: Wed, 13 Apr 2016 00:44:05 +0530 Subject: [PATCH 1/7] Java Examples https://trac.xapian.org/ticket/716 Converting Python examples to Java Examples converted: * index1.py * search1.py * delete1.py Created support.java for parsing csv ToDo: * index_ranges.py * index_filters.py * index_sorting.py * search_filters.py * search_ranges.py * search_sorting.py --- code/java/delete1.java | 34 ++++++++++++++ code/java/delete1.java.out | 0 code/java/index1.java | 96 ++++++++++++++++++++++++++++++++++++++ code/java/search1.java | 68 +++++++++++++++++++++++++++ code/java/search1.java.out | 2 + code/java/support.java | 29 ++++++++++++ conf.py | 2 +- 7 files changed, 230 insertions(+), 1 deletion(-) create mode 100755 code/java/delete1.java create mode 100644 code/java/delete1.java.out create mode 100755 code/java/index1.java create mode 100755 code/java/search1.java create mode 100644 code/java/search1.java.out create mode 100644 code/java/support.java diff --git a/code/java/delete1.java b/code/java/delete1.java new file mode 100755 index 00000000..e56546fa --- /dev/null +++ b/code/java/delete1.java @@ -0,0 +1,34 @@ +package code.java; + +import org.xapian.WritableDatabase; +import org.xapian.XapianConstants; + +public class delete1 { + + // Command line args - dbpath identifiers... + public static void main(String[] args) { + if(args.length < 2) + { + System.out.println("Insufficient number of arguments (should be dbpath identifiers...)"); + return; + } + deleteDocs(args[0], args); + } + + // Start of example code. + public static void deleteDocs(String dbpath, String[] identifierArgs) { + // Open the database we're going to be deleting from. + WritableDatabase db = new WritableDatabase(dbpath, XapianConstants.DB_OPEN); + + // Identifiers start from index 1 + for(int i = 1; i < identifierArgs.length; i++) + { + String idterm = "Q" + identifierArgs[i]; + db.deleteDocument(idterm); + } + + // Commit to delete documents from disk + db.commit(); + } + // End of example code. +} diff --git a/code/java/delete1.java.out b/code/java/delete1.java.out new file mode 100644 index 00000000..e69de29b diff --git a/code/java/index1.java b/code/java/index1.java new file mode 100755 index 00000000..4227774c --- /dev/null +++ b/code/java/index1.java @@ -0,0 +1,96 @@ +package code.java; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.Scanner; +import java.util.ArrayList; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.xapian.Document; +import org.xapian.Stem; +import org.xapian.TermGenerator; +import org.xapian.WritableDatabase; +import org.xapian.XapianConstants; +import org.xapian.XapianJNI; + +public class index1 { + + // Command line args - datapath dbpath + public static void main(String[] args) { + if (args.length < 2) { + System.out.println("Insufficient number of arguments (should be datapath dbpath)"); + return; + } + index(args[0], args[1]); + } + + // Start of example code. + public static void index(String datapath, String dbpath) { + // Create or open the database we're goign to be writing to. + WritableDatabase db = new WritableDatabase(dbpath, XapianConstants.DB_CREATE_OR_OPEN); + // Set up a TermGenerator that we'll use in indexing. + TermGenerator termGenerator = new TermGenerator(); + termGenerator.setStemmer(new Stem("en")); + + //Parsing the CSV input file + Scanner csvScanner, lineScanner; + csvScanner = null; + + try { + File csv = new File(datapath); + csvScanner = new Scanner(csv); + } catch (FileNotFoundException ex) { + Logger.getLogger(index1.class.getName()).log(Level.SEVERE, null, ex); + } + + // Ignoring first line (contains descriptors) + csvScanner.nextLine(); + + while (csvScanner.hasNextLine()) { + String currentLine = csvScanner.nextLine(); + + /* Parsing each line for identifier, title, and description */ + ArrayList parsedCSV = support.parseCsvLine(currentLine); + // Identifier is the first comma seperated value (according to CSV file) + String identifier = parsedCSV.get(0); + + // Title is third comma seperated value + String title = parsedCSV.get(2); + + // Description is ninth comma sperated value + String description = parsedCSV.get(8); + + /* Finished Parsing line */ + + // We make a document and tell the term generator to use this. + Document doc = new Document(); + termGenerator.setDocument(doc); + + // Index each field with a suitable prefix. + termGenerator.indexText(title, 1, "S"); + termGenerator.indexText(description, 1, "XD"); + + // Index fields without prefixes for general search. + termGenerator.indexText(title); + termGenerator.increaseTermpos(); + termGenerator.indexText(description); + + // Store all fields for display purposes + doc.setData(currentLine); + doc.addValue(0, title); + + // We use the identifier to ensure each object ends up in the + // database only once no matter how many times we run the + // indexer. + String idterm = "Q"+identifier; + doc.addBooleanTerm(idterm); + db.replaceDocument(idterm, doc); + } + + // Commit to write documents to disk + db.commit(); + csvScanner.close(); + } + // End of example code. + +} diff --git a/code/java/search1.java b/code/java/search1.java new file mode 100755 index 00000000..c52ab6d7 --- /dev/null +++ b/code/java/search1.java @@ -0,0 +1,68 @@ +package code.java; + +import org.xapian.Database; +import org.xapian.Document; +import org.xapian.Enquire; +import org.xapian.MSet; +import org.xapian.MSetIterator; +import org.xapian.Query; +import org.xapian.QueryParser; +import org.xapian.Stem; + +public class search1 { + + // Command line args - dbpath querystring + public static void main(String[] args) { + if (args.length < 2) { + System.out.println("Insufficient number of arguments (should be dbpath querystring)"); + return; + } + search(args[0], args[1]); + } + + public static void search(String dbpath, String queryString) { + search(dbpath, queryString, 0, 10); + } + + // Start of example code. + public static void search(String dbpath, String queryString, int offset, int pagesize) { + // offset - defines starting point within result set + // pagesize - defines number of records to retrieve + + // Open the databse we're going to search. + Database db = new Database(dbpath); + + // Set up a QueryParser with a stemmer and suitable prefixes + QueryParser queryParser = new QueryParser(); + queryParser.setStemmer(new Stem("en")); + queryParser.setStemmingStrategy(QueryParser.stem_strategy.STEM_SOME); + // Start of prefix configuration. + queryParser.addPrefix("title", "S"); + queryParser.addPrefix("description", "XD"); + // End of prefix configuration. + + // And parse the query + Query query = queryParser.parseQuery(queryString); + + // Use an Enquire object on the database to run the query + Enquire enquire = new Enquire(db); + enquire.setQuery(query); + + // And print out something about each match + MSet mset = enquire.getMSet(offset, pagesize); + MSetIterator msetIterator = mset.begin(); + + while (msetIterator.hasNext()) + { + long rank = msetIterator.getRank(); + long docID = msetIterator.getDocId(); + Document doc = db.getDocument(docID); + String title = doc.getValue(0); + String formattedRank = String.format("%03d",docID); + + System.out.println((rank+1) + ": #" + formattedRank + " " + title); + msetIterator.next(); + } + } + // End of example code. +} diff --git a/code/java/search1.java.out b/code/java/search1.java.out new file mode 100644 index 00000000..7bd9a595 --- /dev/null +++ b/code/java/search1.java.out @@ -0,0 +1,2 @@ +1: #001 Ansonia Sunwatch (pocket compas dial) +INFO:xapian.search:'title:sunwatch'[0:10] = 1 diff --git a/code/java/support.java b/code/java/support.java new file mode 100644 index 00000000..d22a46a1 --- /dev/null +++ b/code/java/support.java @@ -0,0 +1,29 @@ +/* Support code for Java examples */ + +package code.java; +import java.util.ArrayList; + +public class support{ + + public static void logMatches(String queryString, int offset, int pagesize, int matches){ + + } + + // Returns an ArrayList of the parsed CSV line + public static ArrayList parseCsvLine(String csvLine) { + ArrayList words = new ArrayList(); + boolean notInsideComma = true; + int start = 0, end = 0; + for (int i = 0; i < csvLine.length()-1; i++) { + if(csvLine.charAt(i) == ',' && notInsideComma) { + words.add(csvLine.substring(start,i)); + start = i + 1; + } else if (csvLine.charAt(i) == '"') { + notInsideComma =! notInsideComma; + } + } + words.add(csvLine.substring(start)); + return words; + } + +} \ No newline at end of file diff --git a/conf.py b/conf.py index 15c53809..e11a8884 100644 --- a/conf.py +++ b/conf.py @@ -331,7 +331,7 @@ def get_tool_name(envvar, default): if re.search(r'[^-/_+.A-Za-z0-9]', tool): # Or we could actually escape it... print("Bad characters in $%s" % envvar) - sys.exit(1) +# sys.exit(1) return tool # Return the command to actually test run examples using. From 2eabe2af179127381915dd934656d13bf2553c17 Mon Sep 17 00:00:00 2001 From: Aakash Muttineni Date: Wed, 13 Apr 2016 01:05:12 +0530 Subject: [PATCH 2/7] Spacing fixes --- code/java/delete1.java | 6 ++---- code/java/index1.java | 3 +-- code/java/support.java | 35 +++++++++++++++-------------------- 3 files changed, 18 insertions(+), 26 deletions(-) diff --git a/code/java/delete1.java b/code/java/delete1.java index e56546fa..d12d8fce 100755 --- a/code/java/delete1.java +++ b/code/java/delete1.java @@ -7,8 +7,7 @@ public class delete1 { // Command line args - dbpath identifiers... public static void main(String[] args) { - if(args.length < 2) - { + if (args.length < 2) { System.out.println("Insufficient number of arguments (should be dbpath identifiers...)"); return; } @@ -21,8 +20,7 @@ public static void deleteDocs(String dbpath, String[] identifierArgs) { WritableDatabase db = new WritableDatabase(dbpath, XapianConstants.DB_OPEN); // Identifiers start from index 1 - for(int i = 1; i < identifierArgs.length; i++) - { + for (int i = 1; i < identifierArgs.length; i++) { String idterm = "Q" + identifierArgs[i]; db.deleteDocument(idterm); } diff --git a/code/java/index1.java b/code/java/index1.java index 4227774c..0c013a56 100755 --- a/code/java/index1.java +++ b/code/java/index1.java @@ -33,8 +33,7 @@ public static void index(String datapath, String dbpath) { termGenerator.setStemmer(new Stem("en")); //Parsing the CSV input file - Scanner csvScanner, lineScanner; - csvScanner = null; + Scanner csvScanner = null; try { File csv = new File(datapath); diff --git a/code/java/support.java b/code/java/support.java index d22a46a1..2e282e25 100644 --- a/code/java/support.java +++ b/code/java/support.java @@ -3,27 +3,22 @@ package code.java; import java.util.ArrayList; -public class support{ +public class support { - public static void logMatches(String queryString, int offset, int pagesize, int matches){ - - } - - // Returns an ArrayList of the parsed CSV line + // Returns an ArrayList of the parsed CSV line public static ArrayList parseCsvLine(String csvLine) { - ArrayList words = new ArrayList(); - boolean notInsideComma = true; - int start = 0, end = 0; - for (int i = 0; i < csvLine.length()-1; i++) { - if(csvLine.charAt(i) == ',' && notInsideComma) { - words.add(csvLine.substring(start,i)); - start = i + 1; - } else if (csvLine.charAt(i) == '"') { - notInsideComma =! notInsideComma; - } - } - words.add(csvLine.substring(start)); - return words; + ArrayList words = new ArrayList(); + boolean notInsideComma = true;int start = 0, end = 0; + for (int i = 0; i < csvLine.length()-1; i++) { + if(csvLine.charAt(i) == ',' && notInsideComma) { + words.add(csvLine.substring(start,i)); + start = i + 1; + } else if (csvLine.charAt(i) == '"') { + notInsideComma =! notInsideComma; + } + } + words.add(csvLine.substring(start)); + return words; } -} \ No newline at end of file +} From 8ef3f5ad9e7ab4072e93f57a8c1fd9d986f82e39 Mon Sep 17 00:00:00 2001 From: Aakash Muttineni Date: Wed, 13 Apr 2016 18:17:31 +0530 Subject: [PATCH 3/7] CSV parsing Modified parseCsvLine function in support.java to ignore quotes of the csv values while parsing. Also when double quotes "" are present within a quoted csv value, those are changed to single quotes in the output. --- code/java/support.java | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/code/java/support.java b/code/java/support.java index 2e282e25..a405554e 100644 --- a/code/java/support.java +++ b/code/java/support.java @@ -4,21 +4,33 @@ import java.util.ArrayList; public class support { - // Returns an ArrayList of the parsed CSV line public static ArrayList parseCsvLine(String csvLine) { ArrayList words = new ArrayList(); - boolean notInsideComma = true;int start = 0, end = 0; + boolean insideQuote = false, endEarly = false; + int start = 0, end = 0; for (int i = 0; i < csvLine.length()-1; i++) { - if(csvLine.charAt(i) == ',' && notInsideComma) { - words.add(csvLine.substring(start,i)); - start = i + 1; + if(csvLine.charAt(i) == ',' && !insideQuote) { + if (endEarly) { + words.add(csvLine.substring(start,i-1).replace("\"\"","\"")); + endEarly = false; + } else { + words.add(csvLine.substring(start,i)); + } + + if(csvLine.charAt(i+1) == '"') { + start = i + 2; + i++; + endEarly = true; + insideQuote = true; + } else { + start = i + 1; + } } else if (csvLine.charAt(i) == '"') { - notInsideComma =! notInsideComma; + insideQuote = !insideQuote; } } words.add(csvLine.substring(start)); return words; } - } From 7b4c69f58afa1d8a9dcd4bb5ef39da965709c231 Mon Sep 17 00:00:00 2001 From: Aakash Muttineni Date: Wed, 13 Apr 2016 23:18:08 +0530 Subject: [PATCH 4/7] Splitting argument array Modified delete1.java * Split arg array before passing to the delete method --- code/java/delete1.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/code/java/delete1.java b/code/java/delete1.java index d12d8fce..2dc7e609 100755 --- a/code/java/delete1.java +++ b/code/java/delete1.java @@ -11,7 +11,10 @@ public static void main(String[] args) { System.out.println("Insufficient number of arguments (should be dbpath identifiers...)"); return; } - deleteDocs(args[0], args); + // Splitting the array to obtain an array of identifiers + String[] identifierArgs = new String[args.length - 1]; + System.arraycopy(args, 1, identifierArgs, 0, identifierArgs.length); + deleteDocs(args[0], identifierArgs); } // Start of example code. @@ -19,9 +22,8 @@ public static void deleteDocs(String dbpath, String[] identifierArgs) { // Open the database we're going to be deleting from. WritableDatabase db = new WritableDatabase(dbpath, XapianConstants.DB_OPEN); - // Identifiers start from index 1 - for (int i = 1; i < identifierArgs.length; i++) { - String idterm = "Q" + identifierArgs[i]; + for (String identifierArg : identifierArgs) { + String idterm = "Q" + identifierArg; db.deleteDocument(idterm); } From 5712f96e3d1aee57dd94760187f5f3b09cef2542 Mon Sep 17 00:00:00 2001 From: Aakash Muttineni Date: Wed, 13 Apr 2016 23:19:25 +0530 Subject: [PATCH 5/7] Spacing and Typos fix Modified index1.java * Fixed Spacing and typos in comments --- code/java/index1.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/java/index1.java b/code/java/index1.java index 0c013a56..3fd9fc12 100755 --- a/code/java/index1.java +++ b/code/java/index1.java @@ -26,13 +26,13 @@ public static void main(String[] args) { // Start of example code. public static void index(String datapath, String dbpath) { - // Create or open the database we're goign to be writing to. + // Create or open the database we're going to be writing to. WritableDatabase db = new WritableDatabase(dbpath, XapianConstants.DB_CREATE_OR_OPEN); // Set up a TermGenerator that we'll use in indexing. TermGenerator termGenerator = new TermGenerator(); termGenerator.setStemmer(new Stem("en")); - //Parsing the CSV input file + // Parsing the CSV input file Scanner csvScanner = null; try { From e38511f2ad7cf4f093b1e47c19749dc94cb57f9e Mon Sep 17 00:00:00 2001 From: Aakash Muttineni Date: Fri, 15 Apr 2016 20:13:37 +0530 Subject: [PATCH 6/7] Modified Java Build Command Argument The build command will now additionaly take JAVA_BINDINGS_DIR as an argument. JAVA_BINDINGS_DIR is the path of the built directory of the java bindings. * Package names removed from java files * JAVA_BINDINGS_DIR argument added. Classpath and java.library.path will be built from the JAVA_BINDINGS_DIR * JAVA and JAVAC should be used when non-default java and javac are needed Ex: make html SPHINXOPTS=-tjava JAVA_BINDINGS_DIR=/built/ --- code/java/delete1.java | 2 -- code/java/index1.java | 2 -- code/java/search1.java | 2 -- code/java/support.java | 2 -- conf.py | 10 ++++++++-- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/code/java/delete1.java b/code/java/delete1.java index 2dc7e609..2e542eb4 100755 --- a/code/java/delete1.java +++ b/code/java/delete1.java @@ -1,5 +1,3 @@ -package code.java; - import org.xapian.WritableDatabase; import org.xapian.XapianConstants; diff --git a/code/java/index1.java b/code/java/index1.java index 3fd9fc12..f3999e69 100755 --- a/code/java/index1.java +++ b/code/java/index1.java @@ -1,5 +1,3 @@ -package code.java; - import java.io.File; import java.io.FileNotFoundException; import java.util.Scanner; diff --git a/code/java/search1.java b/code/java/search1.java index c52ab6d7..3f3d2985 100755 --- a/code/java/search1.java +++ b/code/java/search1.java @@ -1,5 +1,3 @@ -package code.java; - import org.xapian.Database; import org.xapian.Document; import org.xapian.Enquire; diff --git a/code/java/support.java b/code/java/support.java index a405554e..f257a618 100644 --- a/code/java/support.java +++ b/code/java/support.java @@ -1,6 +1,4 @@ /* Support code for Java examples */ - -package code.java; import java.util.ArrayList; public class support { diff --git a/conf.py b/conf.py index e11a8884..32abc346 100644 --- a/conf.py +++ b/conf.py @@ -376,8 +376,14 @@ def xapian_run_example_command(ex): return "%s -unsafe -target:exe -out:%s.exe %s -r:XapianSharp.dll\n./%s.exe" \ % (csc, ex, xapian_code_example_filename(ex), ex) elif highlight_language == 'java': - javac = get_tool_name('JAVAC', 'javac') - java = get_tool_name('JAVA', 'java') + java_bindings_dir = os.environ.get("JAVA_BINDINGS_DIR") + classpath = '' + java_library_path = '' + if java_bindings_dir is not None: + classpath = ' -classpath ' + java_bindings_dir + 'xapian_jni.jar:./code/java/' + java_library_path = ' -Djava.library.path=' + java_bindings_dir + '../.libs' + javac = get_tool_name('JAVAC', 'javac') + classpath + java = get_tool_name('JAVA', 'java') + java_library_path + classpath return "%s %s\n%s %s" \ % (javac, xapian_code_example_filename(ex), java, ex) else: From 054e7b2097be25bdc6d125da2133b088a81fc2dc Mon Sep 17 00:00:00 2001 From: Aakash Muttineni Date: Thu, 21 Apr 2016 23:30:22 +0530 Subject: [PATCH 7/7] Adding remaining search1 output files --- code/java/search1.java.db_Dent_watch.out | 11 +++++++++++ ...3a=5c=22leather_case=5c=22_AND_title=3asundial.out | 2 ++ code/java/search1.java.db_watch.out | 8 ++++++++ 3 files changed, 21 insertions(+) create mode 100644 code/java/search1.java.db_Dent_watch.out create mode 100644 code/java/search1.java.db_description=3a=5c=22leather_case=5c=22_AND_title=3asundial.out create mode 100644 code/java/search1.java.db_watch.out diff --git a/code/java/search1.java.db_Dent_watch.out b/code/java/search1.java.db_Dent_watch.out new file mode 100644 index 00000000..1d25a553 --- /dev/null +++ b/code/java/search1.java.db_Dent_watch.out @@ -0,0 +1,11 @@ +1: #046 Model by Dent of mechanism for setting hands and winding up +2: #004 Watch with Chinese duplex escapement +3: #018 Solar/Sidereal verge watch with epicyclic maintaining power +4: #013 Watch timer by P +5: #094 Model of a Lever Escapement , 1850-1883 +6: #093 Model of Graham's Cylinder Escapement, 1850-1883 +7: #033 A device by Favag of Neuchatel which enables a stop watch to +8: #015 Ingersoll "Dan Dare" automaton pocket watch with pin-pallet +9: #086 Model representing Earnshaw's detent chronometer escapement, 1950-1883 +10: #036 Universal 'Tri-Compax' chronographic wrist watch +INFO:xapian.search:'Dent watch'[0:10] = 46 4 18 13 94 93 33 15 86 36 diff --git a/code/java/search1.java.db_description=3a=5c=22leather_case=5c=22_AND_title=3asundial.out b/code/java/search1.java.db_description=3a=5c=22leather_case=5c=22_AND_title=3asundial.out new file mode 100644 index 00000000..6420514c --- /dev/null +++ b/code/java/search1.java.db_description=3a=5c=22leather_case=5c=22_AND_title=3asundial.out @@ -0,0 +1,2 @@ +1: #055 Silver altitude sundial in leather case +INFO:xapian.search:'description:"leather case" AND title:sundial'[0:10] = 55 diff --git a/code/java/search1.java.db_watch.out b/code/java/search1.java.db_watch.out new file mode 100644 index 00000000..6fd5cd74 --- /dev/null +++ b/code/java/search1.java.db_watch.out @@ -0,0 +1,8 @@ +1: #004 Watch with Chinese duplex escapement +2: #018 Solar/Sidereal verge watch with epicyclic maintaining power +3: #013 Watch timer by P +4: #033 A device by Favag of Neuchatel which enables a stop watch to +5: #015 Ingersoll "Dan Dare" automaton pocket watch with pin-pallet +6: #036 Universal 'Tri-Compax' chronographic wrist watch +7: #046 Model by Dent of mechanism for setting hands and winding up +INFO:xapian.search:'watch'[0:10] = 4 18 13 33 15 36 46