Permalink
Browse files

* removed unused files

  • Loading branch information...
1 parent 5df3672 commit 75cbd151e2a46620330854154b40764c1d661086 @whym committed Feb 27, 2012
View
35 lcp.py
@@ -1,35 +0,0 @@
-from linsuffarr import *
-
-def longest_common_substring(text, freq, length):
- sa = SuffixArray(text, unit=UNIT_CHARACTER)
- lcp = sa._LCP_values
- sa = sa.SA
- ret = ''
- i = 0
- while i < len(sa):
- f = 1
- while i + 1 < len(sa) and lcp[i+1] >= length:
- f += 1
- i += 1
- if f >= freq and lcp[i] > len(ret):
- ret = text[sa[i-1] : sa[i-1]+lcp[i]]
- if f == 1:
- i += 1
- return ret
-
-def common_substring(a, freq, length):
- text = ''
- anchor = 1
- for x in a:
- text += x + chr(anchor)
- anchor = (anchor + 1) % 256
- return longest_common_substring(text, freq, length)
-
-if __name__ == '__main__':
- a = ['[[File:Stop hand nuvola.svg|30px|alt=|link=]] \'\'\'This is your last warning\'\'\'. You will be blocked from editing the next time you vandalize a page, as you did with <span class="plainlinks">[{{{2}}} this edit]</span> to [[:{{{1}}}]]. <!-- Uw-vandalism4 --><!-- Template:Huggle/warn-4 --> ~~<noinclude></noinclude>~~<noinclude> [[pt:Predefinio:Huggle/warn-4]] </noinclude>',
- '<div style=clear: both></div>{{<includeonly>safesubst:</includeonly>Huggle/uw-4 |page=[[:{{{1}}}]] with [{{{2}}} this edit] |extra=~~<noinclude></noinclude>~~ |reason={{{reason|[[Wikipedia:Vandalism|vandalize]] Wikipedia}}} }}<!-- Template:uw-vandalism4 --><noinclude> {{Huggle/TemplateNotice|series = uw-vandalism|max = 4im|s1 = uw-v4|s2 = uw-vand4|s3 = uw-vandal4|nothankyou=yes}} </noinclude>',
- '<div style=clear: both></div>{{<includeonly>safesubst:</includeonly>Huggle/uw-4 |page=[[:{{{1}}}]] with <span class="plainlinks">[{{{2}}} this edit]</span> |extra=~~<noinclude></noinclude>~~ |reason={{{reason|[[Wikipedia:Vandalism|vandalize]] Wikipedia}}} }}<!-- Template:Huggle/warn-4 --><noinclude> {{Huggle/TemplateNotice|series = uw-vandalism|max = 4im|s1 = uw-v4|s2 = uw-vand4|s3 = uw-vandal4|nothankyou=yes}} </noinclude>',
- '[[File:Stop hand nuvola.svg|30px|alt=|link=]] \'\'\'This is your last warning\'\'\'. You will be blocked from editing the next time you vandalize a page, as you did with <span class="plainlinks">[{{{2}}} this edit]</span> to [[:{{{1}}}]]. <!-- Uw-vandalism4 --><!-- Template:Huggle/warn-4 --> ~~<noinclude></noinclude>~~<noinclude> [[pt:Predefinio:Huggle/warn-4]] </noinclude>',
- '[[File:Stop hand nuvola.svg|30px]] \'\'\'This is your last warning\'\'\'. You may be \'\'\'blocked from editing without further warning\'\'\' the next time you vandalize a page, as you did with <span class="plainlinks">[{{{2}}} this edit]</span> to [[:{{{1}}}]]. <!-- Template:uw-huggle4 --> ~~<noinclude></noinclude>~~<noinclude> </noinclude>',
- '[[File:Stop hand nuvola.svg|30px]] \'\'\'This is your last warning\'\'\'. You may be \'\'\'blocked from editing without further warning\'\'\' the next time you vandalize a page, as you did with <span class="plainlinks">[{{{2}}} this edit]</span> to [[:{{{1}}}]]. <!-- Template:Huggle/warn-4 --><!-- Template:uw-vandalism4 --> ~~<noinclude></noinclude>~~<noinclude>']
- print common_substring(a, 5, 20)
View
50 src/main/java/org/wikimedia/revdiffsearch/Pair.java
@@ -1,50 +0,0 @@
-package org.wikimedia.revdiffsearch;
-
-public class Pair<X,Y> {
- protected X first;
- protected Y second;
- public Pair(X x, Y y) {
- this.first = x;
- this.second = y;
- }
- public Pair(Pair<X,Y> p) {
- this(p.first, p.second);
- }
-
- public X getFirst() { return this.first; }
- public Y getSecond() { return this.second; }
- public void setFirst(X c) { this.first = c; }
- public void setSecond(Y c) { this.second = c; }
- @Override
- public boolean equals(Object o) {
- if ( this == o ) return true;
- if ( o instanceof Pair ) {
- Pair p = (Pair)o;
- return this.first.equals(p.first) && this.second.equals(p.second);
- } else {
- return false;
- }
- }
- @Override public int hashCode() {
- return this.first.hashCode() * 107 + this.second.hashCode();
- }
-
- @Override
- public String toString() {
- return "(" + this.first + "," + this.second + ")";
- }
- public static <Z,W> Pair<Z,W> newInstance(Z z, W w) {
- return new Pair<Z,W>(z,w);
- }
- public static <Z,W> Pair<Z,W> newInstance(Pair<Z,W> p) {
- return new Pair<Z,W>(p);
- }
-}
-
-/*
- * Local variables:
- * tab-width: 2
- * c-basic-offset: 2
- * indent-tabs-mode: nil
- * End:
- */
View
377 src/main/java/org/wikimedia/revdiffsearch/Searcher.java
@@ -1,377 +0,0 @@
-package org.wikimedia.revdiffsearch;
-
-/**
- * Copyright 2011 Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Collection;
-import java.util.ArrayList;
-import org.apache.commons.lang3.StringEscapeUtils;
-import au.com.bytecode.opencsv.CSVWriter;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.Version;
-import org.wikimedia.revdiffsearch.utils.FileUtils;
-
-/** Simple command-line based search demo. */
-public class Searcher {
- private static final String searchKey = "rev_id";
- private static int MAX_HITS = 10000;
-
- private Searcher() {
- }
-
- private static String truncateString(String str, int max) {
- if (str.length() + 3 > max) {
- return str.substring(0, max - 3) + "...";
- } else {
- return str;
- }
- }
-
- /** Simple command-line based search demo. */
- public static void main(String[] args) throws Exception {
- String usage = "Usage:\tjava "
- + Searcher.class.getName()
- + " [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/java/4_0/demo.html for details.";
- if (args.length > 0
- && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
- System.out.println(usage);
- System.exit(0);
- }
-
- String index = "index";
- String field = "added";
- String queries = null;
- int repeat = 0;
- boolean raw = false;
- String queryString = null;
- int hitsPerPage = 10;
- boolean exact = false;
-
- for (int i = 0; i < args.length; i++) {
- if ("-index".equals(args[i])) {
- index = args[i + 1];
- i++;
- } else if ("-field".equals(args[i])) {
- field = args[i + 1];
- i++;
- } else if ("-queries".equals(args[i])) {
- queries = args[i + 1];
- i++;
- } else if ("-query".equals(args[i])) {
- queryString = args[i + 1];
- i++;
- } else if ("-repeat".equals(args[i])) {
- repeat = Integer.parseInt(args[i + 1]);
- i++;
- } else if ("-raw".equals(args[i])) {
- raw = true;
- } else if ("-exact".equals(args[i])) {
- exact = true;
- } else if ("-paging".equals(args[i])) {
- hitsPerPage = Integer.parseInt(args[i + 1]);
- if (hitsPerPage <= 0) {
- System.err
- .println("There must be at least 1 hit per page.");
- System.exit(1);
- }
- i++;
- }
- }
-
- IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(
- index)));
- System.err.println("Index contains " + searcher.maxDoc()
- + " documents.");
- Analyzer analyzer = RevDiffSearchUtils.getAnalyzer();
-
- BufferedReader in = null;
- if (queries != null) {
- in = new BufferedReader(new InputStreamReader(new FileInputStream(
- queries), "UTF-8"));
- } else {
- in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
- }
- QueryParser parser = new QueryParser(Version.LUCENE_35, field, analyzer);
-
- while (true) {
- if (queries == null && queryString == null) { // prompt the user
- System.out.println("Enter query: ");
- }
-
- String line = queryString != null ? queryString : in.readLine();
-
- if (line == null || line.length() == -1) {
- break;
- }
-
- line = line.trim();
- if (line.length() == 0) {
- break;
- }
-
- try {
- Query query = parser.parse(line);
-
- System.out.println("Searching for: " + query.toString(field));
-
- if (repeat > 0) { // repeat & time as benchmark
- Date start = new Date();
- for (int i = 0; i < repeat; i++) {
- searcher.search(query, null, 100);
- }
- Date end = new Date();
- System.out.println("Time: "
- + (end.getTime() - start.getTime()) + "ms");
- }
-
- doPagingSearch(in, searcher, query, hitsPerPage, raw, exact,
- queryString == null? line: queryString,
- queries == null && queryString == null);
-
- } catch (ParseException e) {
- System.out
- .println("Error parsing the query, please refine your query.");
- }
-
- if (queryString != null) {
- break;
- }
- }
- searcher.close();
- }
-
- public static void setMAX_HITS(int max_hits) {
- MAX_HITS = max_hits;
- }
-
- public static int getMAX_HITS() {
- return MAX_HITS;
- }
-
- /**
- * This demonstrates a typical paging search scenario, where the search
- * engine presents pages of size n to the user. The user can then go to the
- * next page if interested in the next hits.
- *
- * When the query is executed for the first time, then only enough results
- * are collected to fill 5 result pages. If the user wants to page beyond
- * this limit, then the query is executed another time and all hits are
- * collected.
- *
- */
- private static void printResultHeading(List<Fieldable> fields) {
- if (fields != null) {
- System.out.print("#\tScore\t");
- Iterator<Fieldable> it = fields.iterator();
- while (it.hasNext()) {
- Fieldable value = it.next();
- System.out.print(value.name() + "\t");
- }
- System.out.println();
- }
- }
-
-
- public static void writeResults(String queryStr, IndexReader reader,
- int hits) {
- String sFileName = FileUtils.createFilename(queryStr);
- int max_hits = getMAX_HITS();
- CSVWriter writer = null;
- try {
- writer = new CSVWriter(new FileWriter(sFileName), '\t');
- Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
- writer.writeNext(fieldNames.toArray(new String[fieldNames.size()]));
- for (int i = 0; i < hits && i <= max_hits; i++) {
- Document doc = reader.document(i);
- List<String> vals = new ArrayList<String>();
- for ( String name: fieldNames ) {
- vals.add(StringEscapeUtils
- .escapeJava(doc.getFieldable(name).stringValue()));
- }
- writer.writeNext(vals.toArray(new String[vals.size()]));
- }
- writer.flush();
- } catch (IOException e) {
- System.out.println("Cannot write to file");
- e.printStackTrace();
- } finally {
- try {
- if ( writer != null )
- writer.close();
- } catch (IOException e) {
- System.out.println("Cannot close writer");
- e.printStackTrace();
- }
- }
-
- }
-
- public static void printCommandline(int start, int hitsPerPage,
- int numTotalHits) {
- System.out.print("Press ");
- if (start - hitsPerPage >= 0) {
- System.out.print("(p)revious page, ");
- }
- if (start + hitsPerPage < numTotalHits) {
- System.out.print("(n)ext page, (w)rite to file, ");
- }
- System.out.println("(q)uit or enter number to jump to a page.");
- }
-
- public static void doPagingSearch(BufferedReader in,
- IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean exact,
- String queryStr, boolean interactive) throws IOException {
- // Collect enough docs to show 5 pages
- TopDocs results = searcher.search(query, 5 * hitsPerPage);
- ScoreDoc[] hits = results.scoreDocs;
-
- int numTotalHits = results.totalHits;
- System.out.println(numTotalHits + " total matching documents");
-
- int start = 0;
- int end = Math.min(numTotalHits, hitsPerPage);
- boolean quit = false;
-
- while (true) {
- if (end > hits.length) {
- System.out
- .println("Only results 1 - " + hits.length + " of "
- + numTotalHits
- + " total matching documents collected.");
- System.out.println("Collect more (y/n) ?");
- String line = in.readLine();
- if (line.length() == 0 || line.charAt(0) == 'n') {
- break;
- }
-
- hits = searcher.search(query, numTotalHits).scoreDocs;
- }
-
- end = Math.min(hits.length, start + hitsPerPage);
-
- for (int i = start; i < end; i++) {
-
- if ( exact ) {
- // check if the document really contains the query string
- // FIXME: doesn't work for any complex ("field:value" type) query
- if (searcher.doc(hits[i].doc).getField("added").stringValue().indexOf(queryStr) < 0 ) {
- continue;
- }
- }
-
- if (raw) { // output raw format
- System.out.println("doc=" + hits[i].doc + " score="
- + hits[i].score);
- continue;
- }
-
- Document doc = searcher.doc(hits[i].doc);
- String key = doc.get(searchKey);
- List<Fieldable> fields = doc.getFields();
- Iterator<Fieldable> it = fields.iterator();
- if (key != null) {
- if (i == 0) {
- printResultHeading(fields);
- }
- System.out.print((i + 1) + ".\t" + hits[i].score + "\t");
- while (it.hasNext()) {
- Fieldable field = it.next();
- System.out.print(StringEscapeUtils
- .escapeJava(truncateString(field.stringValue(),
- 100))
- + "\t");
- }
- // System.out.println((i + 1) + ". " + key);
- System.out.println();
- } else {
- System.out.println((i + 1) + ". " + "No key (" + searchKey
- + ") for this document");
- }
-
- }
-
- if (!interactive || end == 0) {
- break;
- }
-
- if (numTotalHits >= end) {
- while (true) {
-
- printCommandline(start, hitsPerPage, numTotalHits);
-
- String line = in.readLine();
- if (line.length() == 0 || line.charAt(0) == 'x') {
- quit = true;
- break;
- }
- if (line.charAt(0) == 'p') {
- start = Math.max(0, start - hitsPerPage);
- break;
- } else if (line.charAt(0) == 'q') {
- break;
- } else if (line.charAt(0) == 'n') {
- if (start + hitsPerPage < numTotalHits) {
- start += hitsPerPage;
- }
- break;
- } else if (line.charAt(0) == 'w') {
- writeResults(query.toString(), searcher.getIndexReader(), results.totalHits);
-
- } else {
- try {
- int page = Integer.parseInt(line);
- if ((page - 1) * hitsPerPage < numTotalHits) {
- start = (page - 1) * hitsPerPage;
- break;
- }
- } catch (java.lang.NumberFormatException e) {
- System.out.println("No such page");
- }
- }
- }
- end = Math.min(numTotalHits, start + hitsPerPage);
- }
- if (quit) {
- System.exit(0);
- }
- }
- }
-}
-/*
- * Local variables: tab-width: 2 c-basic-offset: 2 indent-tabs-mode: t End:
- */
View
141 src/main/java/org/wikimedia/revdiffsearch/analysis/Dataset.java
@@ -1,141 +0,0 @@
-package org.wikimedia.revdiffsearch.analysis;
-
-import java.io.FileWriter;
-import java.io.IOException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.TimeZone;
-
-import org.apache.commons.lang3.mutable.MutableInt;
-import org.wikimedia.revdiffsearch.utils.FileUtils;
-
-public class Dataset {
- /*
- * The key of the hashmap is the year and the second key is the month.
- *
- *
- */
- static String name;
- static String pattern = "yyyy-MM-dd'T'HH:mm:ss";
- static Calendar calendar;
- static HashMap<Integer, HashMap<Integer, MutableInt>> container = new HashMap<Integer, HashMap<Integer, MutableInt>>();
-
- public Dataset(String dataset_name) {
- name= dataset_name;
- calendar = getCalendar();
- }
-
- public boolean containsYear(int year) {
- if (container.containsKey(year)) {
- return true;
- }
- return false;
- }
-
- public boolean containsMonth(int year, int month) {
- HashMap<Integer, MutableInt> result = container.get(year);
- if (result.containsKey(month)) {
- return true;
- }
- return false;
- }
-
- public static Date convertTimestamptoDate(String timestamp) {
- Date date = null;
-
- SimpleDateFormat sdf = new SimpleDateFormat(pattern);
- try {
- date = sdf.parse(timestamp);
- System.out.println(date.toString());
- } catch (ParseException e) {
- System.out.println("Could not parse " + timestamp.toString());
- }
- return date;
- }
-
- private static Calendar getCalendar() {
- TimeZone tz = TimeZone.getTimeZone("UTC");
- Calendar calendar = Calendar.getInstance(tz);
- return calendar;
- }
-
-
- private static int getComponentFromDate(Date date, String key) {
- int result = 0;
- calendar.setTime(date);
- if (key == "year") {
- result = calendar.get(Calendar.YEAR);
- } else if (key=="month"){
- //ARRGGHHH.. January is 0, so we need to add +1
- result = calendar.get(Calendar.MONTH)+1;
- } else if (key=="day"){
- result = calendar.get(Calendar.DAY_OF_MONTH);
- } else {
- System.out.println(key.toString() + " is an invalid key, please choose from year, month or day");
- }
- return result;
- }
-
-// private static int getMonthFromDate(Date date) {
-// int result = -1;
-// if (date != null) {
-// //Calendar calendar = getCalendar();
-// calendar.setTime(date);
-// result = calendar.get(Calendar.MONTH);
-// }
-// return result;
-// }
-//
-// private static int getYearFromDate(Date date) {
-// int result = -1;
-// if (date != null) {
-// //Calendar calendar = getCalendar();
-// calendar.setTime(date);
-// result = calendar.get(Calendar.YEAR);
-// }
-// return result;
-// }
-
- public void addDate(String timestamp) {
- Date date = convertTimestamptoDate(timestamp);
- int year = getComponentFromDate(date, "year");
- int month = getComponentFromDate(date, "month");
- if (!container.containsKey(year)) {
- container.put(year, new HashMap<Integer, MutableInt>());
- }
-
- if (!container.get(year).containsKey(month)) {
- MutableInt count = new MutableInt();
- container.get(year).put(month, count);
- }
-
- incrementObs(year, month);
- }
-
- private void incrementObs(int year, int month) {
- MutableInt count = container.get(year).get(month);
- count.increment();
- container.get(year).put(month, count);
- }
-
- public static void writeDataset() throws IOException {
- String filename = FileUtils.createFilename(name);
- FileWriter fstream = new FileWriter(filename);
-
- Set<Integer> years = container.keySet();
- for (int year: years) {
- Set<Integer> months = container.get(year).keySet();
- for (int month: months) {
- MutableInt count = container.get(year).get(month);
- String row = String.format("%s\t%s\t%s\n", year, month, count.toString());
- fstream.write(row);
- }
- }
-
- fstream.close();
- }
-}
View
39 src/test/java/org/wikimedia/revdiffsearch/analysis/TestDataset.java
@@ -1,39 +0,0 @@
-package org.wikimedia.revdiffsearch.analysis;
-
-import static org.junit.Assert.*;
-
-import java.util.Date;
-
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestDataset {
-
- private Dataset dataset;
-
- @Before
- public void setUp() throws Exception {
- dataset = new Dataset("foo");
- }
-
- @Test
- public void testConvertTimestamptoDate() {
- String timestamp = "2009-10-19T10:47:34Z";
- Date date = Dataset.convertTimestamptoDate(timestamp);
- assertEquals(Date.class, date.getClass());
- }
-
- @Test
- public void testAddDate() {
- String timestamp = "2009-10-19T10:47:34Z";
- dataset.addDate(timestamp);
- assertEquals("Result", true, dataset.containsYear(2009));
- assertEquals("Result", true, dataset.containsMonth(2009, 10));
- }
-
- @Test
- public void testWriteDataset() {
- System.err.println("Not yet implemented");
- }
-
-}
View
3 template_query.py
@@ -18,7 +18,6 @@
import copy
import re
import wmf
-import lcp
import query as query_func
from wmf.dump.iterator import Iterator
@@ -102,8 +101,6 @@ def escape_variables(text):
revs = [x for x in revs]
for rev in revs:
texts.append(rev.getText().encode('utf-8'))
- # cs = lcp.common_substring(texts, len(texts) * 0.9, 20)
- # print cs
# missed = 0
for ((revPrev,timePrev), (revNext,timeNext)) in gen_prev_next([(x, x.getTimestamp()) for x in revs], (None, int(time.time()))):
View
23 web/client.py
@@ -1,23 +0,0 @@
-import socket
-import sys
-import cPickle
-
-import settings
-
-# Create a socket (SOCK_STREAM means a TCP socket)
-sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-
-try:
- # Connect to server and send data
- data = '0.1'
- sock.connect((settings.HOST, settings.PORT))
- sock.send(data)
-
- # Receive data from the server and shut down
- received = sock.recv(1024)
- result = cPickle.loads(received)
-finally:
- sock.close()
-
-print "Sent: {}".format(data)
-print "Received: {}".format(result)
View
115 web/search.py
@@ -1,115 +0,0 @@
-import os
-import socket
-import sys
-import cPickle
-import json
-
-import web
-from web import form
-
-from mako.template import Template
-from mako.runtime import Context
-from mako.lookup import TemplateLookup
-
-from mimerender import mimerender
-
-import settings
-
-urls = (
-'/', 'index'
-)
-
-
-if settings.DEBUG:
- web.config.debug = True
-
-lookup = TemplateLookup(directories=['templates/'])
-if settings.HOSTNAME == 'production':
- application = web.application(urls, globals()).wsgifunc()
-else:
- app = web.application(urls, globals())
-
-lookup = TemplateLookup(directories=[os.path.join(os.path.dirname(__file__),'templates')])
-render_json = lambda **args: json.dumps(args)
-render_html = lambda message: message
-
-def serve_template(templatename, **kwargs):
- view = lookup.get_template(templatename)
- return view.render(**kwargs)
-
-
-class index:
- def __init__(self, *args, **kwargs):
- self.links= {'rev_id':'w/index.php?diff=',
- 'title':'wiki/',
- 'user_text':'wiki/User:'}
-
- def searchform(self):
- search = form.Form(
- form.Textarea('query', form.notnull),
- form.Button('Search!')
- )
- return search
-
- def GET(self):
- search = self.searchform()
- return serve_template('index.html',form=search)
-
- @mimerender(
- default= 'html',
- html = render_html,
- #json = render_json,
- )
- def POST(self):
- search = self.searchform()
- if not search.validates():
- return serve_template('index.html',form=search)
- else:
- query_str = search['query'].value
- results = self.fetch_results(query_str)
- headings = self.extract_headings(results)
- if settings.DEBUG:
- print results
- return serve_template('results.html',query_str=query_str, results=results, headings=headings, form=search)
-
-
- def extract_headings(self, results):
- return results.pop('headings')
-
-
- def fetch_results(self, query_str):
- # Create a socket (SOCK_STREAM means a TCP socket)
- sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
- try:
- # Connect to server and send data
- sock.connect((settings.HOST, settings.PORT))
- sock.send(query_str)
-
- # Receive data from the server and shut down
- buffer = cStringIO.StringIO()
- buffer.write(sock.recv(4096))
- done = False
- while not done:
- more = sock.recv(4096)
- if not more:
- done = True
- else:
- buffer.write(more)
- #print buffer.getvalue()
- results = cPickle.loads(buffer.getvalue())
- except Exception,e:
- print e
- resuls = e
- finally:
- sock.close()
-
- return results
-
- #print "Sent: {}".format(query_str)
- #print "Received: {}".format(result)
-
-
-if __name__ == '__main__':
- if app:
- app.run()
-
View
62 web/server.py
@@ -1,62 +0,0 @@
-import SocketServer
-import os
-import cPickle
-
-import settings
-from lucene import StandardAnalyzer, File, QueryParser, Version, SimpleFSDirectory, File, IndexSearcher, initVM
-
-
-vm = initVM()
-index_dir = SimpleFSDirectory(File(settings.INDEX_DIR))
-searcher = IndexSearcher(index_dir)
-
-class LuceneServer(SocketServer.BaseRequestHandler):
- """
- The RequestHandler class for our server.
-
- It is instantiated once per connection to the server, and must
- override the handle() method to implement communication to the
- client.
- """
-
- def serialize(self, hits):
- results = {}
- results['headings'] = ['score', 'contents'] #hardcoded not ideal
- for hit in hits.scoreDocs:
- doc = searcher.doc(hit.doc)
- #print dir(doc)
- #print doc.getFields(), doc.getValues("contents")
- #print doc, doc.toString()
- doc.get('contents') #.encode("utf-8")
- results[hit.doc] = {}
- results[hit.doc]['score'] = hit.score
- results[hit.doc]['contents'] = doc.get('contents')
- return cPickle.dumps(results)
-
- def handle(self):
- # self.request is the TCP socket connected to the client
- # self.rfile is a file-like object created by the handler;
- # we can now use e.g. readline() instead of raw recv() calls
- self.data = self.request.recv(1024).strip()
- #print "{} wrote:".format(self.client_address[0])
- #print self.data
- # just send back the same data, but upper-cased
-
- MAX = 50
- analyzer = StandardAnalyzer(Version.LUCENE_34)
- self.data = QueryParser.escape(self.data)
- query = QueryParser(Version.LUCENE_34, 'contents', analyzer).parse(self.data)
-
- hits = searcher.search(query, MAX)
- if settings.DEBUG:
- print "Found %d document(s) that matched query '%s':" % (hits.totalHits, query)
- serialized = self.serialize(hits)
- self.request.send(serialized)
-
-
-if __name__ == "__main__":
- # Create the server, binding to localhost on port 9999
- server = SocketServer.TCPServer((settings.HOST, settings.PORT), LuceneServer)
- # Activate the server; this will keep running until you
- # interrupt the program with Ctrl-C
- server.serve_forever()
View
13 web/settings.py
@@ -1,13 +0,0 @@
-import socket
-import os
-
-hostname = socket.gethostname()
-HOST, PORT = "localhost", 9999
-
-if hostname == 'alpha':
- hostname = 'production'
- INDEX_DIR = os.path.join('/','data-large', 'lucene', 'index')
- DEBUG=False
-else:
- INDEX_DIR = os.path.join('c:\\','lucene-3.4.0','index')
- DEBUG=True
View
13 web/templates/base.html
@@ -1,13 +0,0 @@
-# -*- coding: utf-8 -*-
-<html>
- <head>
- ${self.head_tags()}
- </head>
- <body>
- ${self.body()}
-
- <div class="footer">
- The DiffIndexer and the DiffSearcher are written by <a href="http://www.yusuke.matsubara.name/">Yusuke Matsubara</a> and <a href="http://meta.wikimedia.org/wiki/User:Drdee">Diederik van Liere</a>
- </div>
- </body>
-</html>
View
12 web/templates/index.html
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-<%inherit file="base.html" />
-
-<%def name="head_tags()">
-
- <title>DiffSearchers::search</title>
-
-</%def>
-
-<h1>DiffSearcher: the ultimate Wiki blame tool</h1>
-
-<%include file="searchform.html" />
View
40 web/templates/results.html
@@ -1,40 +0,0 @@
-# -*- coding: utf-8 -*-
-<%inherit file="base.html" />
-
-<%def name="head_tags()">
-
- <title>DiffSearcher::results</title>
-
-</%def>
-
-<h1>You were searching for ${query_str}...</h1>
-
-% if results:
- <table>
- <tr>
- % for heading in headings:
- <td class="even"><h3>${heading}</h3></td>
- % endfor
- </tr>
-
- % for x, hit in enumerate(results):
- % if x % 2 == 1:
- <tr class="even">
- % else:
- <tr class="odd">
- % endif
- % for heading in headings:
- % if heading in links:
- <td><a href="http://en.wikipedia.org/${links[heading]}${results[hit][heading]}">${results[hit][heading]}</a></td>
- % else:
- <td>${results[hit][heading]}</td>
- % endif
- % endfor
- </tr>
- % endfor
- </table>
-% else:
-<p> There were 0 results for ${query_str}.
-% endif
-
-<%include file="searchform.html" />
View
59 web/templates/searchform.html
@@ -1,59 +0,0 @@
-
-
-
- <form name="search" method="post">
- ${form.render()}
- </form>
-
-
-<h2>Constructing queries in Lucene</h2>
-<p>
-You have the full Lucene query power at your disposal. The easiest query is:
- <pre>
- foo
- </pre>
-This will return all revisions where the word foo was either added or removed.
-</p>
-
-<p>
-You can limit your results to certain namespaces:
- <pre>
- foo namespace:3
- </pre>
-This will return all revisions where the word foo was either added or removed in namespace 3.
-</p>
-
-<p>
-You can search for a word in a particular field (see Available fields and as long as Field.Index says Analyzed):
- <pre>
- title:foo namespace:4
- </pre>
-</p>
-
-
-<h2>Available fields</h2>
-<p>
-<ul>
- <li>rev_id, Field.Index.NOT_ANALYZED</li>
- <li>page_id, Field.Index.NOT_ANALYZED</li>
- <li>namespace, Field.Index.NOT_ANALYZED</li>
- <li>title, Field.Index.ANALYZED</li>
- <li>timestamp, Field.Index.NOT_ANALYZED</li>
- <li>minor, Field.Index.NOT_ANALYZED</li>
- <li>user_id, Field.Index.NOT_ANALYZED</li>
- <li>user_text, Field.Index.ANALYZED</li>
-</ul>
-</p>
-
-
-<h2>Next features</h2>
-<ul>
-<li> limit query to date range</li>
-
-</ul>
-
-<h2>Limitations</h2>
-<ul>
-<li> You cannot search the comments at this moment. </li>
-<li> You cannot limit a query to certain users</li>
-</ul>

0 comments on commit 75cbd15

Please sign in to comment.