Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
*) Adding first version of YaCy bookmarklet
- this can be used to easily crawl a webpage which is currently opened in the browser - to get the bookmarklet javascript simply call http://localhost:8000/QuickCrawlLink_p.html and drag and drop the link shown to your Browsers Toolbar/Link-Bar. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1053 6c8d7289-2bf4-0310-a012-ef5d649a1542
- Loading branch information
theli
committed
Nov 8, 2005
1 parent
7e0647f
commit 6f8d7d3
Showing
4 changed files
with
267 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
<html> | ||
<head> | ||
<title>YaCy '#[clientname]#': Quick Crawl Link</title> | ||
#[metas]# | ||
</head> | ||
<body marginheight="0" marginwidth="0" leftmargin="0" topmargin="0"> | ||
|
||
<table border="0" cellspacing="0" cellpadding="0" width="100%"> | ||
<tr><td> | ||
<table class="Heading" border="0" cellspacing="0" cellpadding="0" width="100%"> | ||
<tr height="36"> | ||
<td width="240"> | ||
<a href="http://www.yacy.net"> | ||
<img width="52" height="33" border="0" src="/env/grafics/yacy.gif" align="top"> | ||
</a> | ||
</td> | ||
<td width="50%" align="center"> | ||
<div class="Headline">#[clientname]#: Quick Crawl Link</div> | ||
</td> | ||
<td width="240" align="right"> | ||
</td> | ||
</tr> | ||
</table> | ||
</td></tr> | ||
</table> | ||
<br><br> | ||
|
||
#(mode)#<!-- 0 --> | ||
<table style="margin-left: auto; margin-right: auto;"> | ||
<tr> | ||
<td> | ||
<b>Quickly adding Bookmarks:</b> | ||
<p>Simply drag and drop the link shown below to your Browsers Toolbar/Link-Bar. | ||
If you click on it while browsing, the currently viewed website will be inserted into the YaCy crawling queue for indexing.</p> | ||
</td> | ||
</tr> | ||
<tr> | ||
<td><a href="javascript:w = window.open('http://#[host]#:#[port]#/QuickCrawlLink_p.html?localIndexing=on&crawlingQ=on&xdstopw=on&title='+escape(document.title)+'&url='+location.href,'_blank','height=150,width=500,resizable=yes,scrollbar=no,directory=no,menubar=no,location=no');w.focus();"><img src="/env/grafics/addlink.gif" border="0"> Crawl with YaCy</a></td></tr> | ||
</table> | ||
|
||
::<!-- 1 --> | ||
<table style="margin-left: auto; margin-right: auto;"> | ||
<tr> | ||
<td><b>Title:</b></td> | ||
<td>#[title]#</td> | ||
</tr> | ||
<tr> | ||
<td><b>Link:</b></td> | ||
<td><a href="#[url]#">#[url]#</a></td> | ||
</tr> | ||
<tr> | ||
<td><b>Status:</b></td> | ||
<td> | ||
#(status)#<!-- 0 --> | ||
<font color="green">URL successfully added to Crawler Queue</font> | ||
::<!-- 1 --> | ||
<font color="red">Malformed URL</font> | ||
::<!-- 2 --> | ||
<font color="red">Unable to create new crawling profile for URL: #[error]#</font> | ||
::<!-- 3 --> | ||
<font color="red">Unable to add URL to crawler queue: #[error]#</font> | ||
#(/status)# | ||
</td> | ||
</tr> | ||
</table> | ||
#(/mode)# | ||
|
||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
//QuickCrawlLink_p.java | ||
//----------------------- | ||
//part of the AnomicHTTPD caching proxy | ||
//(C) by Michael Peter Christen; mc@anomic.de | ||
//first published on http://www.anomic.de | ||
//Frankfurt, Germany, 2004 | ||
// | ||
//This file was contributed by Martin Thelian | ||
//$LastChangedDate$ | ||
//$LastChangedBy$ | ||
//$LastChangedRevision$ | ||
// | ||
//This program is free software; you can redistribute it and/or modify | ||
//it under the terms of the GNU General Public License as published by | ||
//the Free Software Foundation; either version 2 of the License, or | ||
//(at your option) any later version. | ||
// | ||
//This program is distributed in the hope that it will be useful, | ||
//but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
//GNU General Public License for more details. | ||
// | ||
//You should have received a copy of the GNU General Public License | ||
//along with this program; if not, write to the Free Software | ||
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
// | ||
//Using this software in any meaning (reading, learning, copying, compiling, | ||
//running) means that you agree that the Author(s) is (are) not responsible | ||
//for cost, loss of data or any harm that may be caused directly or indirectly | ||
//by usage of this softare or this documentation. The usage of this software | ||
//is on your own risk. The installation and usage (starting/running) of this | ||
//software may allow other people or application to access your computer and | ||
//any attached devices and is highly dependent on the configuration of the | ||
//software which must be done by the user of the software; the author(s) is | ||
//(are) also not responsible for proper configuration and usage of the | ||
//software, even if provoked by documentation provided together with | ||
//the software. | ||
// | ||
//Any changes to this file according to the GPL as documented in the file | ||
//gpl.txt aside this file in the shipment you received can be done to the | ||
//lines that follows this copyright notice here, but changes must not be | ||
//done inside the copyright notive above. A re-distribution must contain | ||
//the intact and unchanged copyright notice. | ||
//Contributions and changes to the program code must be marked as such. | ||
|
||
//You must compile this file with | ||
//javac -classpath .:../classes IndexCreate_p.java | ||
//if the shell's current path is HTROOT | ||
|
||
|
||
import java.net.MalformedURLException; | ||
import java.net.URL; | ||
import java.util.Date; | ||
|
||
import de.anomic.htmlFilter.htmlFilterContentScraper; | ||
import de.anomic.http.httpHeader; | ||
import de.anomic.plasma.plasmaCrawlProfile; | ||
import de.anomic.plasma.plasmaSwitchboard; | ||
import de.anomic.plasma.plasmaURL; | ||
import de.anomic.server.serverObjects; | ||
import de.anomic.server.serverSwitch; | ||
import de.anomic.yacy.yacyCore; | ||
|
||
public class QuickCrawlLink_p { | ||
|
||
/** | ||
* Example Javascript to call this servlet: | ||
* <code>javascript:w = window.open('http://user:pwd@localhost:8080/QuickCrawlLink_p.html?localIndexing=on&crawlingQ=on&xdstopw=on&title=' + escape(document.title) + '&url=' + location.href,'_blank','height=150,width=500,resizable=yes,scrollbar=no,directory=no,menubar=no,location=no'); w.focus();</code> | ||
* @param header | ||
* @param post | ||
* @param env | ||
* @return | ||
*/ | ||
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { | ||
|
||
serverObjects prop = new serverObjects(); | ||
plasmaSwitchboard switchboard = (plasmaSwitchboard)env; | ||
|
||
if (post == null) { | ||
// send back usage example | ||
prop.put("mode",0); | ||
|
||
// getting the http host header | ||
String hostSocket = (String) header.get(httpHeader.CONNECTION_PROP_HOST); | ||
|
||
String host = hostSocket; | ||
int port = 80, pos = hostSocket.indexOf(":"); | ||
if (pos != -1) { | ||
port = Integer.parseInt(hostSocket.substring(pos + 1)); | ||
host = hostSocket.substring(0, pos); | ||
} | ||
|
||
prop.put("mode_host", "localhost"); | ||
prop.put("mode_port", Integer.toString(port)); | ||
|
||
return prop; | ||
} | ||
prop.put("mode",1); | ||
|
||
// getting the URL | ||
String crawlingStart = post.get("url",null); | ||
|
||
// getting the browser title | ||
String title = post.get("title",null); | ||
|
||
// getting other parameters if set | ||
String crawlingFilter = post.get("crawlingFilter", ".*"); | ||
int CrawlingDepth = Integer.parseInt(post.get("crawlingDepth", "0")); | ||
boolean crawlDynamic = post.get("crawlingQ", "").equals("on"); | ||
boolean storeHTCache = post.get("storeHTCache", "").equals("on"); | ||
boolean localIndexing = post.get("localIndexing", "").equals("on"); | ||
boolean remoteIndexing = post.get("crawlOrder", "").equals("on"); | ||
boolean xsstopw = post.get("xsstopw", "").equals("on"); | ||
boolean xdstopw = post.get("xdstopw", "").equals("on"); | ||
boolean xpstopw = post.get("xpstopw", "").equals("on"); | ||
|
||
prop.put("mode_url",(crawlingStart==null)?"unknown":crawlingStart); | ||
prop.put("mode_title",(title==null)?"unknown":title); | ||
|
||
if (crawlingStart != null) { | ||
crawlingStart = crawlingStart.trim(); | ||
crawlingStart = htmlFilterContentScraper.urlNormalform(null, crawlingStart); | ||
|
||
// check if url is proper | ||
URL crawlingStartURL = null; | ||
try { | ||
crawlingStartURL = new URL(crawlingStart); | ||
} catch (MalformedURLException e) { | ||
prop.put("mode_status", 1); | ||
return prop; | ||
} | ||
|
||
String urlhash = plasmaURL.urlHash(crawlingStart); | ||
switchboard.urlPool.loadedURL.remove(urlhash); | ||
switchboard.urlPool.noticeURL.remove(urlhash); | ||
switchboard.urlPool.errorURL.remove(urlhash); | ||
|
||
// create crawling profile | ||
plasmaCrawlProfile.entry pe = null; | ||
try { | ||
pe = switchboard.profiles.newEntry( | ||
crawlingStartURL.getHost(), | ||
crawlingStart, | ||
crawlingFilter, | ||
crawlingFilter, | ||
CrawlingDepth, | ||
CrawlingDepth, | ||
crawlDynamic, | ||
storeHTCache, | ||
true, | ||
localIndexing, | ||
remoteIndexing, | ||
xsstopw, | ||
xdstopw, | ||
xpstopw | ||
); | ||
} catch (Exception e) { | ||
// mist | ||
prop.put("mode_status", 2);//Error with url | ||
prop.put("mode_status_error", e.getMessage()); | ||
return prop; | ||
} | ||
|
||
// stack URL | ||
String reasonString = switchboard.sbStackCrawlThread.stackCrawl( | ||
crawlingStart, | ||
null, | ||
yacyCore.seedDB.mySeed.hash, | ||
(title==null)?"CRAWLING-ROOT":title, | ||
new Date(), | ||
0, | ||
pe | ||
); | ||
|
||
// validate rejection reason | ||
if (reasonString == null) { | ||
prop.put("mode_status", 0);//start msg | ||
} else { | ||
prop.put("mode_status", 3);//start msg | ||
prop.put("mode_status_error", reasonString); | ||
} | ||
|
||
} | ||
|
||
return prop; | ||
} | ||
|
||
} |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters