Skip to content

Commit

Permalink
*) Redesign of parser configuration
Browse files Browse the repository at this point in the history
   - restructuring of mimeTypes based on the parsers
   - displaying parser usage count
   - displaying human readably parser names
   - displaying parser version information

*) httpdFileHandler.java
   - adding possibility to support "streaming" servlets
     which are special servlets that can communicate with
     the client via the connection streams autonomous
   - the name of these new servlet types must end with the 
     file extension .stream
   - this feature will be needed by the yacy ScreenSaver
     class to fetch statistic data from the peer without the
     need to reconnect to the server all the time

*) Adding human readable names and version information for
   all supported parsers

*) plasmaParser.java
   - adding new structure to store parser statistic data

*) Adding openDocument parser
   - can be used to parse odt files

*) jmimemagic
   - adding rules to detect openDocument formats properly

*) serverLog.java
   - adding functions that can be used to query if a given
     logging level is enabled or not.


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1140 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
theli committed Nov 29, 2005
1 parent 3037c1e commit bdf3011
Show file tree
Hide file tree
Showing 24 changed files with 735 additions and 166 deletions.
21 changes: 14 additions & 7 deletions htroot/Settings_Parser.inc
Expand Up @@ -7,23 +7,30 @@ For a detailed description of the various MIME-types take a look at <a href="htt
<tr class="TableHeader" valign="bottom">
<td class="small" >Activate</td>
<td class="small" >Mime-Type</td>
<td class="small" >Parser&nbsp;Class&nbsp;Name</td>
<td class="small" >Parser&nbsp;Usage</td>
<td class="small" ></td>
</tr>
#{parser}#
<tr class="TableCellDark">
<td class="small" align="center"><input type="checkbox" name="#[mime]#" align="top" #(status)#::checked#(/status)#></td>
<td class="small" >#[mime]#</td>
<td class="small" title="Full qualified name: #[name]#">#[shortname]#</td>
<td colspan="2">#[name]# V#[version]#</td>
<td>#[usage]#</td>
<td>&nbsp;</td>
</tr>
#{mime}#
<tr class="TableCellLight">
<td class="small" align="center"><input type="checkbox" name="#[mimetype]#" align="top" #(status)#::checked#(/status)#></td>
<td class="small">#[mimetype]#</td>
<td class="small">&nbsp;</td>
<td class="small" width="100%"></td>
</tr>
</tr>
#{/mime}#
#{/parser}#
<tr class="TableCellLight">
<tr class="TableCellDark">
<td class="small" align="center"><input type="checkbox" name="allParserEnabled" align="top" #(allParserEnabled)#::checked#(/allParserEnabled)#>
<td colspan="2" class="small" >Enable all parsers</td>
<td class="small">&nbsp;</td>
</tr>
<tr class="TableCellLight">
<tr class="TableCellDark">
<td colspan="4" class="small" ><input type="submit" name="parserSettings" value="submit">&nbsp;Changes take effect immediately</td>
</tr>
</table>
Expand Down
48 changes: 32 additions & 16 deletions htroot/Settings_p.java
Expand Up @@ -45,13 +45,16 @@

import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;

import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.parser.ParserInfo;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
Expand Down Expand Up @@ -249,29 +252,42 @@ else if (page.equals("parser")) {
* Parser Configuration
*/
plasmaSwitchboard sb = (plasmaSwitchboard)env;
Hashtable enabledParsers = sb.parser.getEnabledParserList();
Hashtable availableParsers = sb.parser.getAvailableParserList();
HashSet enabledParsers = sb.parser.getEnabledParserList();
HashSet parserInfos = new HashSet(sb.parser.getAvailableParserList().values());

// fetching a list of all available mimetypes
List availableParserKeys = Arrays.asList(availableParsers.keySet().toArray(new String[availableParsers.size()]));

// sort it
Collections.sort(availableParserKeys);
// // fetching a list of all available mimetypes
// List availableParserKeys = Arrays.asList(availableParsers.entrySet().toArray(new ParserInfo[availableParsers.size()]));
//
// // sort it
// Collections.sort(availableParserKeys);

// loop through the mimeTypes and add it to the properties
boolean allParsersEnabled = true;
int parserIdx = 0;
Iterator availableParserIter = availableParserKeys.iterator();

Iterator availableParserIter = parserInfos.iterator();
while (availableParserIter.hasNext()) {
String mimeType = (String) availableParserIter.next();
String parserName = (String) availableParsers.get(mimeType);
boolean parserIsEnabled = enabledParsers.containsKey(mimeType);
ParserInfo parserInfo = (ParserInfo) availableParserIter.next();
prop.put("parser_" + parserIdx + "_name", parserInfo.parserName);
prop.put("parser_" + parserIdx + "_version", parserInfo.parserVersionNr);
prop.put("parser_" + parserIdx + "_usage", Integer.toString(parserInfo.usageCount));

prop.put("parser_" + parserIdx + "_mime", mimeType);
prop.put("parser_" + parserIdx + "_name", parserName);
prop.put("parser_" + parserIdx + "_shortname", parserName.substring(parserName.lastIndexOf(".")+1));
prop.put("parser_" + parserIdx + "_status", parserIsEnabled ? 1:0);
allParsersEnabled &= parserIsEnabled;
int mimeIdx = 0;
Enumeration mimeTypeIter = parserInfo.supportedMimeTypes.keys();
while (mimeTypeIter.hasMoreElements()) {
String mimeType = (String)mimeTypeIter.nextElement();

boolean parserIsEnabled = enabledParsers.contains(mimeType);

prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_mimetype", mimeType);
//prop.put("parser_" + parserIdx + "_name", parserName);
//prop.put("parser_" + parserIdx + "_shortname", parserName.substring(parserName.lastIndexOf(".")+1));
prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_status", enabledParsers.contains(mimeType) ? 1:0);
allParsersEnabled &= parserIsEnabled;

mimeIdx++;
}
prop.put("parser_" + parserIdx + "_mime", mimeIdx);

parserIdx++;
}
Expand Down
2 changes: 2 additions & 0 deletions httpd.mime
Expand Up @@ -37,6 +37,7 @@ mov = video/quicktime
mpe = video/mpeg
mpeg = video/mpeg
mpg = video/mpeg
odt = application/vnd.oasis.opendocument.text
ogg = audio/ogg-vorbis
pac = application/x-ns-proxy-autoconfig
pdf = application/pdf
Expand Down Expand Up @@ -70,6 +71,7 @@ tif = image/tiff
tiff = image/tiff
torrent = application/x-bittorrent
txt = text/plain
vcf = text/x-vcard
wav = audio/x-wav
xhtml = application/xhtml+xml
xla = application/msexcel
Expand Down
Binary file modified libx/jmimemagic-0.0.4a.jar
Binary file not shown.
Binary file added libx/odf_utils_05_11_10.jar
Binary file not shown.
25 changes: 20 additions & 5 deletions source/de/anomic/http/httpdFileHandler.java
Expand Up @@ -346,7 +346,7 @@ public void doResponse(Properties conProp, httpHeader requestHeader, OutputStrea
int argc;
if (argsString == null) {
// no args here, maybe a POST with multipart extension
int length;
int length = 0;
//System.out.println("HEADER: " + requestHeader.toString()); // DEBUG
if (method.equals(httpHeader.METHOD_POST)) {

Expand All @@ -356,10 +356,11 @@ public void doResponse(Properties conProp, httpHeader requestHeader, OutputStrea
} else if (requestHeader.gzip()) {
length = -1;
gzipBody = new GZIPInputStream(body);
} else {
httpd.sendRespondError(conProp,out,4,403,null,"bad post values",null);
return;
}
// } else {
// httpd.sendRespondError(conProp,out,4,403,null,"bad post values",null);
// return;
// }

// if its a POST, it can be either multipart or as args in the body
if ((requestHeader.containsKey(httpHeader.CONTENT_TYPE)) &&
Expand Down Expand Up @@ -438,7 +439,7 @@ public void doResponse(Properties conProp, httpHeader requestHeader, OutputStrea
}
}else{
//you cannot share a .png/.gif file with a name like a class in htroot.
if ( !(targetFile.exists()) && !((path.endsWith("png")||path.endsWith("gif"))&&targetClass!=null ) ){
if ( !(targetFile.exists()) && !((path.endsWith("png")||path.endsWith("gif")||path.endsWith(".stream"))&&targetClass!=null ) ){
targetFile = new File(htDocsPath, path);
targetClass = rewriteClassFile(new File(htDocsPath, path));
}
Expand Down Expand Up @@ -486,6 +487,20 @@ public void doResponse(Properties conProp, httpHeader requestHeader, OutputStrea
Thread.currentThread().sleep(200); // see below
serverFileUtils.write(result, out);
}
} else if ((targetClass != null) && (path.endsWith(".stream"))) {
// call rewrite-class
requestHeader.put("CLIENTIP", conProp.getProperty("CLIENTIP"));
requestHeader.put("PATH", path);
requestHeader.put("INPUTSTREAM", body);
requestHeader.put("OUTPUTSTREAM", out);

httpd.sendRespondHeader(this.connectionProperties, out, httpVersion, 200, null);

// in case that there are no args given, args = null or empty hashmap
serverObjects tp = (serverObjects) rewriteMethod(targetClass).invoke(null, new Object[] {requestHeader, args, switchboard});

this.forceConnectionClose();
return;
} else if ((targetFile.exists()) && (targetFile.canRead())) {
// we have found a file that can be written to the client
// if this file uses templates, then we use the template
Expand Down
24 changes: 24 additions & 0 deletions source/de/anomic/plasma/parser/AbstractParser.java
Expand Up @@ -73,6 +73,16 @@ public abstract class AbstractParser implements Parser{
* purposes.
*/
protected serverLog theLogger = null;

/**
* Version number of the parser
*/
protected String parserVersionNr = "0.1";

/**
* Parser name
*/
protected String parserName = this.getClass().getSimpleName();

/**
* The Constructor of this class.
Expand Down Expand Up @@ -165,4 +175,18 @@ public void setLogger(serverLog log) {
this.theLogger = log;
}

/**
* Returns the version number of the parser
* @return parser version number
*/
public String getVersion() {
return this.parserVersionNr;
}

/**
* Return the name of the parser
*/
public String getName() {
return parserName;
}
}
13 changes: 13 additions & 0 deletions source/de/anomic/plasma/parser/Parser.java
Expand Up @@ -122,4 +122,17 @@ public plasmaParserDocument parse(URL location, String mimeType, InputStream sou
*/
public void setLogger(serverLog log);

/**
* Returns the version number of the current parser
* @return parser version number
*/
public String getVersion();

/**
* Returns the name of the parser
* @return parser name
*/
public String getName();
}


34 changes: 34 additions & 0 deletions source/de/anomic/plasma/parser/ParserInfo.java
@@ -0,0 +1,34 @@
package de.anomic.plasma.parser;

import java.util.Hashtable;

public class ParserInfo {
// general parser info
public Class parserClass;
public String parserClassName;

public String parserName;
public String parserVersionNr;

// parser properties
public String[] libxDependencies;
public Hashtable supportedMimeTypes;

// usage statistic
public int usageCount = 0;

public String toString() {
StringBuffer toStr = new StringBuffer();

toStr.append(this.parserName).append(" V")
.append((this.parserVersionNr==null)?"0.0":this.parserVersionNr).append(" | ")
.append(this.parserClassName).append(" | ")
.append(this.supportedMimeTypes);

return toStr.toString();
}

public synchronized void incUsageCounter() {
this.usageCount++;
}
}
4 changes: 2 additions & 2 deletions source/de/anomic/plasma/parser/bzip/bzipParser.java
Expand Up @@ -77,9 +77,10 @@ public class bzipParser extends AbstractParser implements Parser {
private static final String[] LIBX_DEPENDENCIES = new String[] {
"bzip2.jar"
};

public bzipParser() {
super(LIBX_DEPENDENCIES);
parserName = "Bzip 2 UNIX Compressed File Parser";
}

public Hashtable getSupportedMimeTypes() {
Expand All @@ -105,7 +106,6 @@ public plasmaParserDocument parse(URL location, String mimeType, InputStream sou

int read = 0;
byte[] data = new byte[1024];

CBZip2InputStream zippedContent = new CBZip2InputStream(source);

tempFile = File.createTempFile("bunzip","tmp");
Expand Down
1 change: 1 addition & 0 deletions source/de/anomic/plasma/parser/doc/docParser.java
Expand Up @@ -75,6 +75,7 @@ public class docParser

public docParser() {
super(LIBX_DEPENDENCIES);
parserName = "Word Document Parser";
}

public plasmaParserDocument parse(URL location, String mimeType,
Expand Down
1 change: 1 addition & 0 deletions source/de/anomic/plasma/parser/gzip/gzipParser.java
Expand Up @@ -76,6 +76,7 @@ public class gzipParser extends AbstractParser implements Parser {

public gzipParser() {
super(LIBX_DEPENDENCIES);
parserName = "GNU Zip Compressed Archive Parser";
}

public Hashtable getSupportedMimeTypes() {
Expand Down

0 comments on commit bdf3011

Please sign in to comment.