added more / all new crawl profile fields into crawl profile editor

yacy · Oct 31, 2012 · 0716a24 · 0716a24
1 parent 4a14122
commit 0716a24
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 26 deletions.
diff --git a/htroot/CrawlProfileEditor_p.html b/htroot/CrawlProfileEditor_p.html
@@ -110,8 +110,8 @@ <h3>Crawl Profile Editor</h3>
           <dd>#(readonly)# 
             <input id="#[name]#" name="#[name]#"
                    #(type)# type="checkbox"#(checked)#:: checked="checked"#(/checked)#::
-                    type="text" value="#[value]#"::
-                    type="text" value="#[value]#"#(/type)# />::
+                    type="text" value="#[value]#" size="120" maxlength="10000"::
+                    type="text" value="#[value]#" size="120" maxlength="10000"#(/type)# />::
             <strong>#(type)##(checked)#false::true#(/checked)#::#[value]#::#[value]##(/type)#</strong>#(/readonly)# 
           </dd>#{/entries}# 
         <dt>&nbsp;</dt><dd><input type="submit" name="submit" value="Submit changes" class="submitready"/></dd>

diff --git a/htroot/CrawlProfileEditor_p.java b/htroot/CrawlProfileEditor_p.java
@@ -64,9 +64,17 @@ public eentry(final String name, final String label, final boolean readonly, fin
 
     private static final List <eentry> labels = new ArrayList<eentry>();
     static {
-        labels.add(new eentry(CrawlProfile.NAME,                "Name",                  true,  eentry.STRING));
-        labels.add(new eentry(CrawlProfile.CRAWLER_URL_MUSTMATCH,    "Must-Match Filter",     false, eentry.STRING));
-        labels.add(new eentry(CrawlProfile.CRAWLER_URL_MUSTNOTMATCH, "Must-Not-Match Filter", false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.NAME,                          "Name",                                 true,  eentry.STRING));
+        labels.add(new eentry(CrawlProfile.COLLECTIONS,                   "Collections (comma-separated list)",   false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.CRAWLER_URL_MUSTMATCH,         "URL Must-Match Filter",                false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.CRAWLER_URL_MUSTNOTMATCH,      "URL Must-Not-Match Filter",            false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.CRAWLER_IP_MUSTMATCH,          "IP Must-Match Filter",                 false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.CRAWLER_IP_MUSTNOTMATCH,       "IP Must-Not-Match Filter",             false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.CRAWLER_COUNTRY_MUSTMATCH,     "Country Must-Match Filter",            false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.CRAWLER_URL_NODEPTHLIMITMATCH, "URL No-Depth-Limit Must-Match Filter", false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.INDEXING_URL_MUSTMATCH,        "Indexing Must-Match Filter",           false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.INDEXING_URL_MUSTNOTMATCH,     "Indexing Must-Not-Match Filter",       false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.CACHE_STRAGEGY,  "Cache Strategy (NOCACHE,IFFRESH,IFEXIST,CACHEONLY)", false, eentry.STRING));
         labels.add(new eentry(CrawlProfile.DEPTH,               "Crawl Depth",           false, eentry.INTEGER));
         labels.add(new eentry(CrawlProfile.RECRAWL_IF_OLDER,    "Recrawl If Older",      false, eentry.INTEGER));
         labels.add(new eentry(CrawlProfile.DOM_MAX_PAGES,       "Domain Max. Pages",     false, eentry.INTEGER));
@@ -75,8 +83,9 @@ public eentry(final String name, final String label, final boolean readonly, fin
         labels.add(new eentry(CrawlProfile.INDEX_MEDIA,         "Index Media",           false, eentry.BOOLEAN));
         labels.add(new eentry(CrawlProfile.STORE_HTCACHE,       "Store in HTCache",      false, eentry.BOOLEAN));
         labels.add(new eentry(CrawlProfile.REMOTE_INDEXING,     "Remote Indexing",       false, eentry.BOOLEAN));
+        labels.add(new eentry(CrawlProfile.DIRECT_DOC_BY_URL,   "Put all linked urls into index without parsing", false, eentry.BOOLEAN));
     }
-
+    
     public static serverObjects respond(
             @SuppressWarnings("unused") final RequestHeader header,
             final serverObjects post,

diff --git a/source/net/yacy/crawler/data/CrawlProfile.java b/source/net/yacy/crawler/data/CrawlProfile.java
@@ -55,26 +55,26 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
 
     // this is a simple record structure that hold all properties of a single crawl start
     private static final String HANDLE           = "handle";
-    public  static final String NAME             = "name";
-    public  static final String DEPTH            = "generalDepth";
-    private static final String DIRECT_DOC_BY_URL= "directDocByURL";
-    public  static final String RECRAWL_IF_OLDER = "recrawlIfOlder";
-    public  static final String DOM_MAX_PAGES    = "domMaxPages";
-    public  static final String CRAWLING_Q       = "crawlingQ";
-    public  static final String INDEX_TEXT       = "indexText";
-    public  static final String INDEX_MEDIA      = "indexMedia";
-    public  static final String STORE_HTCACHE    = "storeHTCache";
-    public  static final String REMOTE_INDEXING  = "remoteIndexing";
-    private static final String CACHE_STRAGEGY   = "cacheStrategy";
-    public  static final String CRAWLER_URL_MUSTMATCH         = "crawlerURLMustMatch";
-    public  static final String CRAWLER_URL_MUSTNOTMATCH      = "crawlerURLMustNotMatch";
-    private static final String CRAWLER_IP_MUSTMATCH          = "crawlerIPMustMatch";
-    private static final String CRAWLER_IP_MUSTNOTMATCH       = "crawlerIPMustNotMatch";
-    private static final String CRAWLER_COUNTRY_MUSTMATCH     = "crawlerCountryMustMatch";
-    private static final String CRAWLER_URL_NODEPTHLIMITMATCH = "crawlerNoLimitURLMustMatch";
-    private static final String INDEXING_URL_MUSTMATCH        = "indexURLMustMatch";
-    private static final String INDEXING_URL_MUSTNOTMATCH     = "indexURLMustNotMatch";
-    private static final String COLLECTIONS = "collections";
+    public static final String NAME             = "name";
+    public static final String DEPTH            = "generalDepth";
+    public static final String DIRECT_DOC_BY_URL= "directDocByURL";
+    public static final String RECRAWL_IF_OLDER = "recrawlIfOlder";
+    public static final String DOM_MAX_PAGES    = "domMaxPages";
+    public static final String CRAWLING_Q       = "crawlingQ";
+    public static final String INDEX_TEXT       = "indexText";
+    public static final String INDEX_MEDIA      = "indexMedia";
+    public static final String STORE_HTCACHE    = "storeHTCache";
+    public static final String REMOTE_INDEXING  = "remoteIndexing";
+    public static final String CACHE_STRAGEGY   = "cacheStrategy";
+    public static final String COLLECTIONS      = "collections";
+    public static final String CRAWLER_URL_MUSTMATCH         = "crawlerURLMustMatch";
+    public static final String CRAWLER_URL_MUSTNOTMATCH      = "crawlerURLMustNotMatch";
+    public static final String CRAWLER_IP_MUSTMATCH          = "crawlerIPMustMatch";
+    public static final String CRAWLER_IP_MUSTNOTMATCH       = "crawlerIPMustNotMatch";
+    public static final String CRAWLER_COUNTRY_MUSTMATCH     = "crawlerCountryMustMatch";
+    public static final String CRAWLER_URL_NODEPTHLIMITMATCH = "crawlerNoLimitURLMustMatch";
+    public static final String INDEXING_URL_MUSTMATCH        = "indexURLMustMatch";
+    public static final String INDEXING_URL_MUSTNOTMATCH     = "indexURLMustNotMatch";
 
     private Pattern crawlerurlmustmatch = null, crawlerurlmustnotmatch = null;
     private Pattern crawleripmustmatch = null, crawleripmustnotmatch = null;