Skip to content

Commit

Permalink
*) Faster appearance of ConfigBasic by bypassing UPNP-scan in case of…
Browse files Browse the repository at this point in the history
… existing external connects

*) Marked two deprecated source-points
*) Added possibility to dump words from indexing to file. Should not affect performance in the current form.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3592 6c8d7289-2bf4-0310-a012-ef5d649a1542
  • Loading branch information
(no author) committed Apr 24, 2007
1 parent 657585f commit 4f4d3d7
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 2 deletions.
5 changes: 4 additions & 1 deletion htroot/ConfigBasic.java
Expand Up @@ -103,7 +103,10 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve

// scan for Upnp routers
long begin = System.currentTimeMillis();
boolean upnpRouterFound = config.findUPnPRouter(3000);
boolean upnpRouterFound = false;
if (yacyCore.seedDB.mySeed.isVirgin() || yacyCore.seedDB.mySeed.isJunior()) {
upnpRouterFound = config.findUPnPRouter(3000);
}
long end = System.currentTimeMillis();

// if the upnp router scan has taken less than 3 sec, we need to wait
Expand Down
1 change: 1 addition & 0 deletions htroot/WatchCrawler_p.java
Expand Up @@ -266,6 +266,7 @@ public static serverObjects respond(httpHeader header, serverObjects post, serve
HashMap hyperlinks = (HashMap) scraper.getAnchors();

// creating a crawler profile
/* file.toURL() marked as deprecated per Sun JRE 6 */
plasmaCrawlProfile.entry profile = switchboard.profiles.newEntry(fileName, file.toURL().toString(), newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, crawlingIfOlder, crawlingDomFilterDepth, crawlingDomMaxPages, crawlingQ, indexText, indexMedia, storeHTCache, true, crawlOrder, xsstopw, xdstopw, xpstopw);

// loop through the contained links
Expand Down
1 change: 1 addition & 0 deletions source/de/anomic/net/ftpc.java
Expand Up @@ -639,6 +639,7 @@ public boolean HASH() {
}

public boolean JAR() {
/*Sun proprietary API may be removed in a future Java release*/
sun.tools.jar.Main.main(shift(cmd));
return true;
}
Expand Down
18 changes: 17 additions & 1 deletion source/de/anomic/plasma/plasmaCondenser.java
Expand Up @@ -52,6 +52,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.Enumeration;
Expand Down Expand Up @@ -390,6 +391,21 @@ private void createCondensement(InputStream is, String charset) throws Unsupport
word = (new String((StringBuffer) wordenum.nextElement())).toLowerCase(); // TODO: does toLowerCase work for non ISO-8859-1 chars?
//System.out.println("PARSED-WORD " + word);

//This is useful for testing what YaCy "sees" of a website.
if (false) {
File f = new File("dump.txt");
RandomAccessFile fa = null;
try {
fa = new RandomAccessFile(f, "rw");
fa.seek(fa.length());
fa.writeBytes(word);
fa.write(160);
fa.close();
} catch (IOException e) {
e.printStackTrace();
}
}

// distinguish punctuation and words
wordlen = word.length();
if ((wordlen == 1) && (htmlFilterContentScraper.punctuation(word.charAt(0)))) {
Expand Down Expand Up @@ -450,7 +466,7 @@ private void createCondensement(InputStream is, String charset) throws Unsupport
wordInSentenceCounter++;
}
}
// finnish last sentence
// finish last sentence
if (sentence.length() > 0) {
allsentencecounter++;
sentence.insert(0, "."); // append at beginning
Expand Down

0 comments on commit 4f4d3d7

Please sign in to comment.