Permalink
Browse files

fight the fieldcache by usind DocValues: in Solr-5.x the fieldcache has

moved and was not cleared anymore. This results in an huge fieldcache.
(http://lucene.apache.org/#highlights-of-the-lucene-release-include
https://issues.apache.org/jira/browse/LUCENE-5666)
Here I try to use DovValues where it is possible.
For this I used the Api-Scheme as new basis für the Solr-Schema.
This needs at least a complete optimization of the Solr-Index to get a
smaller FieldCache.
Everything that is indexed with these setting will not use the
Fieldcache at all.
  • Loading branch information...
sixcooler committed Aug 31, 2015
1 parent c729d08 commit 87e4abe393d70165e3d7dfaec5be7365ff2ac965

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -106,5 +106,6 @@ private static void addField(servletProperties prop, int c, SchemaDeclaration fi
prop.put("fields_" + c + "_storedChecked", field.isStored() ? 1 : 0);
prop.put("fields_" + c + "_multiValuedChecked", field.isMultiValued() ? 1 : 0);
prop.put("fields_" + c + "_omitNormsChecked", field.isOmitNorms() ? 1 : 0);
prop.put("fields_" + c + "_docValueChecked", field.isDocValue() ? 1 : 0);
}
}
@@ -1,18 +1,18 @@
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="YaCy2Solr" version="1.3">
<schema name="YaCy2Solr" version="1.4">
<types>
<fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true" />
<fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true" docValues="true" />
<fieldType name="boolean" class="solr.BoolField" omitNorms="true" sortMissingLast="true" />
<fieldType name="int" class="solr.TrieIntField" omitNorms="true" precisionStep="0" positionIncrementGap="0" />
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" omitNorms="true" precisionStep="0" positionIncrementGap="0" />
<fieldType name="tdouble" class="solr.TrieDoubleField" omitNorms="true" precisionStep="8" positionIncrementGap="0" />
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0" />
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="int" class="solr.TrieIntField" omitNorms="true" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="double" class="solr.TrieDoubleField" omitNorms="true" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tdouble" class="solr.TrieDoubleField" omitNorms="true" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0" docValues="true" />
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" docValues="true" />
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
@@ -30,12 +30,12 @@
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -55,12 +55,12 @@
</types>
<fields>
<field name="_version_" type="long" indexed="true" stored="true"/> <!-- a mandatory solr field used for operation in SolrCloud -->
<field name="_version_" type="long" indexed="true" stored="true" docValues="true" /> <!-- a mandatory solr field used for operation in SolrCloud -->
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> <!-- this is default in solrconfix.xml; not used but you get errors when not defining it -->
#{fields}#
<!-- #[comment]# -->
<field name="#[solrname]#" type="#[type]#"#(indexedChecked)#:: indexed="true"#(/indexedChecked)##(storedChecked)#:: stored="true"#(/storedChecked)##(multiValuedChecked)#:: multiValued="true"#(/multiValuedChecked)##(omitNormsChecked)#:: omitNorms="true"#(/omitNormsChecked)#/>
<field name="#[solrname]#" type="#[type]#"#(indexedChecked)#:: indexed="true"#(/indexedChecked)##(storedChecked)#:: stored="true"#(/storedChecked)##(multiValuedChecked)#:: multiValued="true"#(/multiValuedChecked)##(omitNormsChecked)#:: omitNorms="true"#(/omitNormsChecked)##(docValueChecked)#:: docValues="true"#(/docValueChecked)#/>
#{/fields}#
@@ -46,6 +46,8 @@
public boolean isSearchable();
public boolean isOmitNorms();
public boolean isDocValue();
public String getComment();
@@ -523,7 +523,7 @@ public long getCountByQuery(String querystring) throws IOException {
params.setFacetMinCount(1); // there are many 0-count facets in the uninverted index cache
params.setFacetLimit(maxresults);
params.setFacetSort(FacetParams.FACET_SORT_COUNT);
params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_fc /*FACET_METHOD_fcs*/);
params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_enum); // fight the fieldcache
params.setFields(fields);
params.clearSorts();
params.setIncludeScore(false);
@@ -461,10 +461,10 @@ private SolrQuery getBasicParams(boolean getFacets, List<String> fqs) {
params.setFacetMinCount(1);
params.setFacetLimit(FACETS_STANDARD_MAXCOUNT);
params.setFacetSort(FacetParams.FACET_SORT_COUNT);
params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_fcs);
params.setParam(FacetParams.FACET_METHOD, FacetParams.FACET_METHOD_enum); // fight the fieldcache
for (String field: this.facetfields) params.addFacetField("{!ex=" + field + "}" + field); // params.addFacetField("{!ex=" + field + "}" + field);
if (this.facetfields.contains(CollectionSchema.dates_in_content_dts.name())) {
params.setParam("facet.range", CollectionSchema.dates_in_content_dts.name());
params.setParam(FacetParams.FACET_RANGE, CollectionSchema.dates_in_content_dts.name());
String start = TrieDateField.formatExternal(new Date(System.currentTimeMillis() - 1000L * 60L * 60L * 24L * 3));
String end = TrieDateField.formatExternal(new Date(System.currentTimeMillis() + 1000L * 60L * 60L * 24L * 3));
params.setParam("f." + CollectionSchema.dates_in_content_dts.getSolrFieldName() + ".facet.range.start", start);
@@ -252,7 +252,7 @@
private String solrFieldName = null; // solr field name in custom solr schema, defaults to solcell schema field name (= same as this.name() )
private final SolrType type;
private final boolean indexed, stored, searchable, multiValued, omitNorms;
private final boolean indexed, stored, searchable, multiValued, omitNorms, docValues;
private String comment;
private CollectionSchema(final SolrType type, final boolean indexed, final boolean stored, final boolean multiValued, final boolean omitNorms, final boolean searchable, final String comment) {
@@ -263,6 +263,7 @@ private CollectionSchema(final SolrType type, final boolean indexed, final boole
this.omitNorms = omitNorms;
this.searchable = searchable;
this.comment = comment;
this.docValues = (type == SolrType.string || type == SolrType.date);
// verify our naming scheme
String name = this.name();
int p = name.indexOf('_');
@@ -336,6 +337,11 @@ public final boolean isOmitNorms() {
public final boolean isSearchable() {
return this.searchable;
}
@Override
public boolean isDocValue() {
return this.docValues;
}
@Override
public final String getComment() {
@@ -105,7 +105,7 @@
private String solrFieldName = null; // solr field name in custom solr schema
private final SolrType type;
private final boolean indexed, stored, multiValued, omitNorms, searchable;
private final boolean indexed, stored, multiValued, omitNorms, searchable, docValues;
private String comment;
@@ -117,6 +117,7 @@ private WebgraphSchema(final SolrType type, final boolean indexed, final boolean
this.omitNorms = omitNorms;
this.searchable = searchable;
this.comment = comment;
this.docValues = (type == SolrType.string || type == SolrType.date);
// verify our naming scheme
String name = this.name();
int p = name.indexOf('_');
@@ -190,6 +191,11 @@ public final boolean isSearchable() {
return this.searchable;
}
@Override
public boolean isDocValue() {
return this.docValues;
}
@Override
public final String getComment() {
return this.comment;

0 comments on commit 87e4abe

Please sign in to comment.