diff --git a/htroot/ConfigHeuristics_p.java b/htroot/ConfigHeuristics_p.java index 669399a5a7..bdcc3a0b04 100644 --- a/htroot/ConfigHeuristics_p.java +++ b/htroot/ConfigHeuristics_p.java @@ -72,7 +72,7 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea } if (post.containsKey("opensearch_off")) sb.setConfig("heuristic.opensearch", false); if (post.containsKey("discoverosd")) { - final boolean metafieldNOTavailable = sb.index.fulltext().getSolrScheme().containsDisabled(YaCySchema.outboundlinks_tag_txt.name()); + final boolean metafieldNOTavailable = sb.index.fulltext().getSolrSchema().containsDisabled(YaCySchema.outboundlinks_tag_txt.name()); if (!metafieldNOTavailable) { OpenSearchConnector osc = new OpenSearchConnector(sb, false); if (osc.discoverFromSolrIndex(sb)) { @@ -102,24 +102,24 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea } if (post.containsKey("setopensearch")) { - // read index scheme table flags + // read index schema table flags writeopensearchcfg (sb,post); } if (post.containsKey("switchsolrfieldson")) { - final boolean metafieldNOTavailable = sb.index.fulltext().getSolrScheme().containsDisabled(YaCySchema.outboundlinks_tag_txt.name()); + final boolean metafieldNOTavailable = sb.index.fulltext().getSolrSchema().containsDisabled(YaCySchema.outboundlinks_tag_txt.name()); if (metafieldNOTavailable) { ConfigurationSet.Entry entry; - entry = sb.index.fulltext().getSolrScheme().get(YaCySchema.outboundlinks_tag_txt.name()); + entry = sb.index.fulltext().getSolrSchema().get(YaCySchema.outboundlinks_tag_txt.name()); if (entry != null && !entry.enabled()) { entry.setEnable(true); } - entry = sb.index.fulltext().getSolrScheme().get(YaCySchema.inboundlinks_tag_txt.name()); + entry = sb.index.fulltext().getSolrSchema().get(YaCySchema.inboundlinks_tag_txt.name()); if (entry != null && !entry.enabled()) { entry.setEnable(true); } try { - sb.index.fulltext().getSolrScheme().commit(); + sb.index.fulltext().getSolrSchema().commit(); } catch (IOException ex) {} } } @@ -139,7 +139,7 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea } } - final boolean showmetafieldbutton = sb.index.fulltext().getSolrScheme().containsDisabled(YaCySchema.outboundlinks_tag_txt.name()); + final boolean showmetafieldbutton = sb.index.fulltext().getSolrSchema().containsDisabled(YaCySchema.outboundlinks_tag_txt.name()); if (showmetafieldbutton) prop.put("osdsolrfieldswitch",1); prop.put("site.checked", sb.getConfigBool("heuristic.site", false) ? 1 : 0); prop.put("searchresult.checked", sb.getConfigBool("heuristic.searchresults", false) ? 1 : 0); @@ -175,7 +175,7 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea } private static void writeopensearchcfg(final Switchboard sb, final serverObjects post) { - // read index scheme table flags + // read index schema table flags final File f = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf"); ConfigurationSet cfg = new ConfigurationSet(f); diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index 51020747f1..5cc8b25f16 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -55,7 +55,7 @@ public static serverObjects respond(final RequestHeader header, serverObjects po final serverObjects prop = new serverObjects(); int lines = 500; - boolean showCollection = sb.index.fulltext().getSolrScheme().isEmpty() || sb.index.fulltext().getSolrScheme().contains(YaCySchema.collection_sxt); + boolean showCollection = sb.index.fulltext().getSolrSchema().isEmpty() || sb.index.fulltext().getSolrSchema().contains(YaCySchema.collection_sxt); boolean showInit = env.getConfigBool("IndexMonitorInit", false); boolean showExec = env.getConfigBool("IndexMonitorExec", false); boolean showDate = env.getConfigBool("IndexMonitorDate", true); diff --git a/htroot/CrawlStartExpert_p.java b/htroot/CrawlStartExpert_p.java index 6b8b532e45..0b3bd70a5a 100644 --- a/htroot/CrawlStartExpert_p.java +++ b/htroot/CrawlStartExpert_p.java @@ -76,7 +76,7 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea prop.put("xdstopwChecked", env.getConfigBool("xdstopw", true) ? "1" : "0"); prop.put("xpstopwChecked", env.getConfigBool("xpstopw", true) ? "1" : "0"); - boolean collectionEnabled = sb.index.fulltext().getSolrScheme().isEmpty() || sb.index.fulltext().getSolrScheme().contains(YaCySchema.collection_sxt); + boolean collectionEnabled = sb.index.fulltext().getSolrSchema().isEmpty() || sb.index.fulltext().getSolrSchema().contains(YaCySchema.collection_sxt); prop.put("collectionEnabled", collectionEnabled ? 1 : 0); prop.put("collection", collectionEnabled ? "user" : ""); diff --git a/htroot/IndexFederated_p.html b/htroot/IndexFederated_p.html index 7f6925f69e..3baaea7006 100644 --- a/htroot/IndexFederated_p.html +++ b/htroot/IndexFederated_p.html @@ -1,7 +1,7 @@ - YaCy '#[clientname]#': Federated Index + YaCy '#[clientname]#': Remote Solr Configuration #%env/templates/metas.template%# @@ -12,7 +12,7 @@ #%env/templates/header.template%# #%env/templates/submenuIndexControl.template%# -

Federated Index

+

Remote Solr Configuration

YaCy supports multiple index storage locations. At this time only the YaCy-internal search index can be used for the YaCy search interface A Solr index storage location is optional. The local index storage location can be disabled. @@ -64,38 +64,12 @@

Federated Index

Solr URL(s)

You can set one or more Solr targets here which are accessed as a shard. For several targets, list them using a ',' (comma) as separator.
-
Commit-Within (milliseconds)
-
(increase this value to i.e. 180000 - 3 minutes - for more performance)
-
Lazy Value Initialization
-
(if checked, only non-zero values and non-empty strings are written)
Sharding Method
-
Scheme
-
-
-

Index Scheme

If you use a custom Solr schema you may enter a different field name in the column 'Custom Solr Field Name' of the YaCy default attribute name

- - - - - - - - #{scheme}# - - - - - - - #{/scheme}# -
ActiveAttributeCustom Solr Field NameComment
#[key]##[comment]#
-
- - + #%env/templates/footer.template%# diff --git a/htroot/IndexFederated_p.java b/htroot/IndexFederated_p.java index 6af0118b6f..efd8d5332f 100644 --- a/htroot/IndexFederated_p.java +++ b/htroot/IndexFederated_p.java @@ -3,10 +3,6 @@ * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany * First released 25.05.2011 at http://yacy.net * - * $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $ - * $LastChangedRevision: 7653 $ - * $LastChangedBy: orbiter $ - * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either @@ -26,17 +22,16 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.util.Iterator; +import java.util.ArrayList; import org.apache.solr.common.SolrException; import net.yacy.cora.document.UTF8; -import net.yacy.cora.federate.solr.YaCySchema; import net.yacy.cora.federate.solr.connector.RemoteSolrConnector; import net.yacy.cora.federate.solr.connector.ShardSelection; import net.yacy.cora.federate.solr.connector.ShardSolrConnector; import net.yacy.cora.federate.solr.connector.SolrConnector; -import net.yacy.cora.federate.yacy.ConfigurationSet; +import net.yacy.cora.federate.solr.instance.SolrRemoteInstance; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.OS; @@ -95,7 +90,6 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea String solrurls = post.get("solr.indexing.url", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, "http://127.0.0.1:8983/solr")); final boolean solrRemoteIsOnAfterwards = post.getBoolean("solr.indexing.solrremote") & solrurls.length() > 0; env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, solrRemoteIsOnAfterwards); - boolean lazy = post.getBoolean("solr.indexing.lazy"); final BufferedReader r = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(UTF8.getBytes(solrurls)))); final StringBuilder s = new StringBuilder(); String s0; @@ -113,11 +107,8 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea } solrurls = s.toString().trim(); env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, solrurls); - env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, lazy); env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SHARDING, post.get("solr.indexing.sharding", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SHARDING, "modulo-host-md5"))); - final String schemename = post.get("solr.indexing.schemefile", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SCHEMEFILE, "solr.keys.default.list")); - env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SCHEMEFILE, schemename); - + if (solrRemoteWasOn && !solrRemoteIsOnAfterwards) { // switch off try { @@ -133,7 +124,8 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea final boolean usesolr = sb.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, false) & solrurls.length() > 0; try { if (usesolr) { - SolrConnector solr = new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true); + ArrayList instances = ShardSolrConnector.getShardInstances(solrurls); + ShardSolrConnector solr = new ShardSolrConnector(instances, ShardSelection.Method.MODULO_HOST_MD5, true); sb.index.fulltext().connectRemoteSolr(solr); } else { sb.index.fulltext().disconnectRemoteSolr(); @@ -149,35 +141,6 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea } catch (SolrException e) { Log.logSevere("IndexFederated_p", "change of solr connection failed", e); } - - // read index scheme table flags - final Iterator i = sb.index.fulltext().getSolrScheme().entryIterator(); - ConfigurationSet.Entry entry; - boolean modified = false; // flag to remember changes - while (i.hasNext()) { - entry = i.next(); - final String v = post.get("scheme_" + entry.key()); - final String sfn = post.get("scheme_solrfieldname_" + entry.key()); - if (sfn != null ) { - // set custom solr field name - if (!sfn.equals(entry.getValue())) { - entry.setValue(sfn); - modified = true; - } - } - // set enable flag - final boolean c = v != null && v.equals("checked"); - if (entry.enabled() != c) { - entry.setEnable(c); - modified = true; - } - } - if (modified) { // save settings to config file if modified - try { - sb.index.fulltext().getSolrScheme().commit(); - modified = false; - } catch (IOException ex) {} - } } // show solr host table @@ -187,7 +150,7 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea prop.put("table", 1); final SolrConnector solr = sb.index.fulltext().getRemoteSolr(); final long[] size = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getSizeList() : new long[]{((RemoteSolrConnector) solr).getSize()}; - final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((RemoteSolrConnector) solr).getAdminInterface()}; + final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((SolrRemoteInstance) ((RemoteSolrConnector) solr).getInstance()).getAdminInterface()}; boolean dark = false; for (int i = 0; i < size.length; i++) { prop.put("table_list_" + i + "_dark", dark ? 1 : 0); dark = !dark; @@ -197,34 +160,12 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea prop.put("table_list", size.length); } - // write scheme - final String schemename = sb.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SCHEMEFILE, "solr.keys.default.list"); - - int c = 0; - boolean dark = false; - // use enum SolrField to keep defined order - for(YaCySchema field : YaCySchema.values()) { - prop.put("scheme_" + c + "_dark", dark ? 1 : 0); dark = !dark; - prop.put("scheme_" + c + "_checked", sb.index.fulltext().getSolrScheme().contains(field.name()) ? 1 : 0); - prop.putHTML("scheme_" + c + "_key", field.name()); - prop.putHTML("scheme_" + c + "_solrfieldname",field.name().equalsIgnoreCase(field.getSolrFieldName()) ? "" : field.getSolrFieldName()); - if (field.getComment() != null) prop.putHTML("scheme_" + c + "_comment",field.getComment()); - c++; - } - prop.put("scheme", c); - - // fill attribute fields - // allowed values are: classic, solr, off - // federated.service.yacy.indexing.engine = classic - prop.put(SwitchboardConstants.CORE_SERVICE_FULLTEXT + ".checked", env.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, false) ? 1 : 0); prop.put(SwitchboardConstants.CORE_SERVICE_RWI + ".checked", env.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, false) ? 1 : 0); prop.put(SwitchboardConstants.CORE_SERVICE_CITATION + ".checked", env.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, false) ? 1 : 0); prop.put("solr.indexing.solrremote.checked", env.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, false) ? 1 : 0); prop.put("solr.indexing.url", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, "http://127.0.0.1:8983/solr").replace(",", "\n")); - prop.put("solr.indexing.lazy.checked", env.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, true) ? 1 : 0); prop.put("solr.indexing.sharding", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SHARDING, "modulo-host-md5")); - prop.put("solr.indexing.schemefile", schemename); if ((sb.index.fulltext().connectedURLDb())) { prop.put("migrateUrlDbtoSolr", 1); diff --git a/htroot/IndexSchema_p.html b/htroot/IndexSchema_p.html new file mode 100644 index 0000000000..2b063a0738 --- /dev/null +++ b/htroot/IndexSchema_p.html @@ -0,0 +1,48 @@ + + + + YaCy '#[clientname]#': Solr Schema Editor + #%env/templates/metas.template%# + + +
+ +API +The solr schema can also be retrieved as xml here. Click the API icon to see the xml. Just copy this xml to solr/conf/schema.xml to configure solr. +
+ #%env/templates/header.template%# + #%env/templates/submenuIndexControl.template%# +

Solr Schema Editor

+

If you use a custom Solr schema you may enter a different field name in the column 'Custom Solr Field Name' of the YaCy default attribute name

+ +
+
+
+ + + + + + + + #{schema}# + + + + + + + #{/schema}# +
ActiveAttributeCustom Solr Field NameComment
#[key]##[comment]#
+
+
Lazy Value Initialization
+
(if checked, only non-zero values and non-empty strings are written)
+
+
+
+ +
+ + #%env/templates/footer.template%# + + diff --git a/htroot/IndexSchema_p.java b/htroot/IndexSchema_p.java new file mode 100644 index 0000000000..f47432d930 --- /dev/null +++ b/htroot/IndexSchema_p.java @@ -0,0 +1,92 @@ +/** + * IndexSchemaFulltext_p + * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 13.02.2013 at http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +import java.io.IOException; +import java.util.Iterator; + +import net.yacy.cora.federate.solr.YaCySchema; +import net.yacy.cora.federate.yacy.ConfigurationSet; +import net.yacy.cora.protocol.RequestHeader; +import net.yacy.search.Switchboard; +import net.yacy.search.SwitchboardConstants; +import net.yacy.server.serverObjects; +import net.yacy.server.serverSwitch; + +public class IndexSchema_p { + + public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { + // return variable that accumulates replacements + final serverObjects prop = new serverObjects(); + final Switchboard sb = (Switchboard) env; + + if (post != null && post.containsKey("set")) { + // read index schema table flags + final Iterator i = sb.index.fulltext().getSolrSchema().entryIterator(); + ConfigurationSet.Entry entry; + boolean modified = false; // flag to remember changes + while (i.hasNext()) { + entry = i.next(); + final String v = post.get("schema_" + entry.key()); + final String sfn = post.get("schema_solrfieldname_" + entry.key()); + if (sfn != null ) { + // set custom solr field name + if (!sfn.equals(entry.getValue())) { + entry.setValue(sfn); + modified = true; + } + } + // set enable flag + final boolean c = v != null && v.equals("checked"); + if (entry.enabled() != c) { + entry.setEnable(c); + modified = true; + } + } + if (modified) { // save settings to config file if modified + try { + sb.index.fulltext().getSolrSchema().commit(); + modified = false; + } catch (IOException ex) {} + } + + boolean lazy = post.getBoolean("lazy"); + env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, lazy); + + } + + int c = 0; + boolean dark = false; + // use enum SolrField to keep defined order + for(YaCySchema field : YaCySchema.values()) { + prop.put("schema_" + c + "_dark", dark ? 1 : 0); dark = !dark; + prop.put("schema_" + c + "_checked", sb.index.fulltext().getSolrSchema().contains(field.name()) ? 1 : 0); + prop.putHTML("schema_" + c + "_key", field.name()); + prop.putHTML("schema_" + c + "_solrfieldname",field.name().equalsIgnoreCase(field.getSolrFieldName()) ? "" : field.getSolrFieldName()); + if (field.getComment() != null) prop.putHTML("schema_" + c + "_comment",field.getComment()); + c++; + } + prop.put("schema", c); + + prop.put("lazy.checked", env.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, true) ? 1 : 0); + + // return rewrite properties + return prop; + } +} diff --git a/htroot/api/schema.java b/htroot/api/schema.java index 670450ff17..74c774500e 100644 --- a/htroot/api/schema.java +++ b/htroot/api/schema.java @@ -39,11 +39,11 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea final servletProperties prop = new servletProperties(); final Switchboard sb = (Switchboard) env; - // write scheme + // write schema int c = 0; - SolrConfiguration solrScheme = sb.index.fulltext().getSolrScheme(); + SolrConfiguration solrSchema = sb.index.fulltext().getSolrSchema(); for (YaCySchema field : YaCySchema.values()) { - if (solrScheme.contains(field.name())) { + if (solrSchema.contains(field.name())) { addField(prop, c, field); c++; } @@ -51,13 +51,13 @@ public static serverObjects respond(@SuppressWarnings("unused") final RequestHea //if (solrScheme.contains(YaCySchema.author)) {addField(prop, c, YaCySchema.author_sxt);} prop.put("fields", c); - prop.put("copyFieldAuthor", solrScheme.contains(YaCySchema.author) ? 1 : 0); + prop.put("copyFieldAuthor", solrSchema.contains(YaCySchema.author) ? 1 : 0); prop.put("solruniquekey",YaCySchema.id.getSolrFieldName()); prop.put("solrdefaultsearchfield", - solrScheme.contains(YaCySchema.text_t) ? YaCySchema.text_t.getSolrFieldName() : - solrScheme.contains(YaCySchema.fuzzy_signature_text_t) ? YaCySchema.fuzzy_signature_text_t.getSolrFieldName() : - solrScheme.contains(YaCySchema.h1_txt) ? YaCySchema.h1_txt.getSolrFieldName() : + solrSchema.contains(YaCySchema.text_t) ? YaCySchema.text_t.getSolrFieldName() : + solrSchema.contains(YaCySchema.fuzzy_signature_text_t) ? YaCySchema.fuzzy_signature_text_t.getSolrFieldName() : + solrSchema.contains(YaCySchema.h1_txt) ? YaCySchema.h1_txt.getSolrFieldName() : YaCySchema.id.getSolrFieldName() ); diff --git a/htroot/env/templates/submenuIndexControl.template b/htroot/env/templates/submenuIndexControl.template index abb791a44c..e0d46e7552 100644 --- a/htroot/env/templates/submenuIndexControl.template +++ b/htroot/env/templates/submenuIndexControl.template @@ -2,7 +2,8 @@

Index Administration