Skip to content

Commit

Permalink
added more / all new crawl profile fields into crawl profile editor
Browse files Browse the repository at this point in the history
  • Loading branch information
Orbiter committed Oct 31, 2012
1 parent 4a14122 commit 0716a24
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 26 deletions.
4 changes: 2 additions & 2 deletions htroot/CrawlProfileEditor_p.html
Expand Up @@ -110,8 +110,8 @@ <h3>Crawl Profile Editor</h3>
<dd>#(readonly)#
<input id="#[name]#" name="#[name]#"
#(type)# type="checkbox"#(checked)#:: checked="checked"#(/checked)#::
type="text" value="#[value]#"::
type="text" value="#[value]#"#(/type)# />::
type="text" value="#[value]#" size="120" maxlength="10000"::
type="text" value="#[value]#" size="120" maxlength="10000"#(/type)# />::
<strong>#(type)##(checked)#false::true#(/checked)#::#[value]#::#[value]##(/type)#</strong>#(/readonly)#
</dd>#{/entries}#
<dt>&nbsp;</dt><dd><input type="submit" name="submit" value="Submit changes" class="submitready"/></dd>
Expand Down
17 changes: 13 additions & 4 deletions htroot/CrawlProfileEditor_p.java
Expand Up @@ -64,9 +64,17 @@ public eentry(final String name, final String label, final boolean readonly, fin

private static final List <eentry> labels = new ArrayList<eentry>();
static {
labels.add(new eentry(CrawlProfile.NAME, "Name", true, eentry.STRING));
labels.add(new eentry(CrawlProfile.CRAWLER_URL_MUSTMATCH, "Must-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.CRAWLER_URL_MUSTNOTMATCH, "Must-Not-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.NAME, "Name", true, eentry.STRING));
labels.add(new eentry(CrawlProfile.COLLECTIONS, "Collections (comma-separated list)", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.CRAWLER_URL_MUSTMATCH, "URL Must-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.CRAWLER_URL_MUSTNOTMATCH, "URL Must-Not-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.CRAWLER_IP_MUSTMATCH, "IP Must-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.CRAWLER_IP_MUSTNOTMATCH, "IP Must-Not-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.CRAWLER_COUNTRY_MUSTMATCH, "Country Must-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.CRAWLER_URL_NODEPTHLIMITMATCH, "URL No-Depth-Limit Must-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.INDEXING_URL_MUSTMATCH, "Indexing Must-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.INDEXING_URL_MUSTNOTMATCH, "Indexing Must-Not-Match Filter", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.CACHE_STRAGEGY, "Cache Strategy (NOCACHE,IFFRESH,IFEXIST,CACHEONLY)", false, eentry.STRING));
labels.add(new eentry(CrawlProfile.DEPTH, "Crawl Depth", false, eentry.INTEGER));
labels.add(new eentry(CrawlProfile.RECRAWL_IF_OLDER, "Recrawl If Older", false, eentry.INTEGER));
labels.add(new eentry(CrawlProfile.DOM_MAX_PAGES, "Domain Max. Pages", false, eentry.INTEGER));
Expand All @@ -75,8 +83,9 @@ public eentry(final String name, final String label, final boolean readonly, fin
labels.add(new eentry(CrawlProfile.INDEX_MEDIA, "Index Media", false, eentry.BOOLEAN));
labels.add(new eentry(CrawlProfile.STORE_HTCACHE, "Store in HTCache", false, eentry.BOOLEAN));
labels.add(new eentry(CrawlProfile.REMOTE_INDEXING, "Remote Indexing", false, eentry.BOOLEAN));
labels.add(new eentry(CrawlProfile.DIRECT_DOC_BY_URL, "Put all linked urls into index without parsing", false, eentry.BOOLEAN));
}

public static serverObjects respond(
@SuppressWarnings("unused") final RequestHeader header,
final serverObjects post,
Expand Down
40 changes: 20 additions & 20 deletions source/net/yacy/crawler/data/CrawlProfile.java
Expand Up @@ -55,26 +55,26 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M

// this is a simple record structure that hold all properties of a single crawl start
private static final String HANDLE = "handle";
public static final String NAME = "name";
public static final String DEPTH = "generalDepth";
private static final String DIRECT_DOC_BY_URL= "directDocByURL";
public static final String RECRAWL_IF_OLDER = "recrawlIfOlder";
public static final String DOM_MAX_PAGES = "domMaxPages";
public static final String CRAWLING_Q = "crawlingQ";
public static final String INDEX_TEXT = "indexText";
public static final String INDEX_MEDIA = "indexMedia";
public static final String STORE_HTCACHE = "storeHTCache";
public static final String REMOTE_INDEXING = "remoteIndexing";
private static final String CACHE_STRAGEGY = "cacheStrategy";
public static final String CRAWLER_URL_MUSTMATCH = "crawlerURLMustMatch";
public static final String CRAWLER_URL_MUSTNOTMATCH = "crawlerURLMustNotMatch";
private static final String CRAWLER_IP_MUSTMATCH = "crawlerIPMustMatch";
private static final String CRAWLER_IP_MUSTNOTMATCH = "crawlerIPMustNotMatch";
private static final String CRAWLER_COUNTRY_MUSTMATCH = "crawlerCountryMustMatch";
private static final String CRAWLER_URL_NODEPTHLIMITMATCH = "crawlerNoLimitURLMustMatch";
private static final String INDEXING_URL_MUSTMATCH = "indexURLMustMatch";
private static final String INDEXING_URL_MUSTNOTMATCH = "indexURLMustNotMatch";
private static final String COLLECTIONS = "collections";
public static final String NAME = "name";
public static final String DEPTH = "generalDepth";
public static final String DIRECT_DOC_BY_URL= "directDocByURL";
public static final String RECRAWL_IF_OLDER = "recrawlIfOlder";
public static final String DOM_MAX_PAGES = "domMaxPages";
public static final String CRAWLING_Q = "crawlingQ";
public static final String INDEX_TEXT = "indexText";
public static final String INDEX_MEDIA = "indexMedia";
public static final String STORE_HTCACHE = "storeHTCache";
public static final String REMOTE_INDEXING = "remoteIndexing";
public static final String CACHE_STRAGEGY = "cacheStrategy";
public static final String COLLECTIONS = "collections";
public static final String CRAWLER_URL_MUSTMATCH = "crawlerURLMustMatch";
public static final String CRAWLER_URL_MUSTNOTMATCH = "crawlerURLMustNotMatch";
public static final String CRAWLER_IP_MUSTMATCH = "crawlerIPMustMatch";
public static final String CRAWLER_IP_MUSTNOTMATCH = "crawlerIPMustNotMatch";
public static final String CRAWLER_COUNTRY_MUSTMATCH = "crawlerCountryMustMatch";
public static final String CRAWLER_URL_NODEPTHLIMITMATCH = "crawlerNoLimitURLMustMatch";
public static final String INDEXING_URL_MUSTMATCH = "indexURLMustMatch";
public static final String INDEXING_URL_MUSTNOTMATCH = "indexURLMustNotMatch";

private Pattern crawlerurlmustmatch = null, crawlerurlmustnotmatch = null;
private Pattern crawleripmustmatch = null, crawleripmustnotmatch = null;
Expand Down

0 comments on commit 0716a24

Please sign in to comment.