Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue/vivo 3606 : add language-specific sorting and label fields to search index #321

Merged
merged 13 commits into from Aug 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -6,16 +6,19 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import edu.cornell.mannlib.vitro.webapp.web.templatemodels.individuallist.ListedIndividualBuilder;
import javax.servlet.annotation.WebServlet;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.beans.VClassGroup;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ExceptionResponseValues;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ResponseValues;
Expand All @@ -27,8 +30,7 @@
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
import edu.cornell.mannlib.vitro.webapp.utils.searchengine.SearchQueryUtils;
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.individuallist.ListedIndividual;

import javax.servlet.annotation.WebServlet;
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.individuallist.ListedIndividualBuilder;

/**
* Generates a list of individuals for display in a template
Expand All @@ -43,6 +45,7 @@ public class IndividualListController extends FreemarkerHttpServlet {
private static final int MAX_PAGES = 40; // must be even

private static final String TEMPLATE_DEFAULT = "individualList.ftl";
private static final String LANGUAGE_FILTER_PROPERTY = "RDFService.languageFilter";

@Override
protected ResponseValues processRequest(VitroRequest vreq) {
Expand Down Expand Up @@ -152,12 +155,12 @@ public static int getPageParameter(VitroRequest request) {
return SearchQueryUtils.getPageParameter(request);
}

public static IndividualListResults getResultsForVClass(String vclassURI, int page, String alpha, VitroRequest vreq)
public static IndividualListResults getResultsForVClass(String vclassURI,
int page, String alpha, VitroRequest vreq)
throws SearchException{
try{
List<String> classUris = Collections.singletonList(vclassURI);
IndividualListQueryResults results = buildAndExecuteVClassQuery(classUris, alpha, page, INDIVIDUALS_PER_PAGE, vreq.getWebappDaoFactory().getIndividualDao());
return getResultsForVClassQuery(results, page, INDIVIDUALS_PER_PAGE, alpha, vreq);
List<String> classUris = Collections.singletonList(vclassURI);
return buildAndExecuteVClassQuery(classUris, page, INDIVIDUALS_PER_PAGE, alpha, vreq);
} catch (SearchEngineException e) {
String msg = "An error occurred retrieving results for vclass query";
log.error(msg, e);
Expand All @@ -169,16 +172,27 @@ public static IndividualListResults getResultsForVClass(String vclassURI, int pa
}
}

public static IndividualListResults getResultsForVClassIntersections(List<String> vclassURIs, int page, int pageSize, String alpha, VitroRequest vreq) {
public static IndividualListResults getResultsForVClassIntersections(
List<String> classUris, int page, int pageSize, String alpha, VitroRequest vreq) {
try{
IndividualListQueryResults results = buildAndExecuteVClassQuery(vclassURIs, alpha, page, pageSize, vreq.getWebappDaoFactory().getIndividualDao());
return getResultsForVClassQuery(results, page, pageSize, alpha, vreq);
return buildAndExecuteVClassQuery(classUris, page, pageSize, alpha, vreq);
} catch(Throwable th) {
log.error("Error retrieving individuals corresponding to intersection multiple classes." + vclassURIs.toString(), th);
log.error("Error retrieving individuals corresponding to intersection multiple classes." + classUris.toString(), th);
return IndividualListResults.EMPTY;
}
}

private static IndividualListResults buildAndExecuteVClassQuery(List<String> classUris, int page, int pageSize,
String alpha, VitroRequest vreq) throws SearchEngineException {
ConfigurationProperties props = ConfigurationProperties.getBean(vreq);
boolean languageFilter = Boolean.valueOf(props.getProperty(LANGUAGE_FILTER_PROPERTY, "false"));
IndividualListQueryResults results = buildAndExecuteVClassQuery(classUris, alpha,
((languageFilter) ? vreq.getLocale() : null), page, pageSize,
vreq.getWebappDaoFactory().getIndividualDao());
IndividualListResults indListResults = getResultsForVClassQuery(results, page, pageSize, alpha, vreq);
return indListResults;
}

public static IndividualListResults getRandomResultsForVClass(String vclassURI, int page, int pageSize, VitroRequest vreq) {
try{
List<String> classUris = Collections.singletonList(vclassURI);
Expand All @@ -201,9 +215,10 @@ private static IndividualListResults getResultsForVClassQuery(IndividualListQuer


private static IndividualListQueryResults buildAndExecuteVClassQuery(
List<String> vclassURIs, String alpha, int page, int pageSize, IndividualDao indDao)
List<String> vclassURIs, String alpha, Locale locale, int page,
int pageSize, IndividualDao indDao)
throws SearchEngineException {
SearchQuery query = SearchQueryUtils.getQuery(vclassURIs, alpha, page, pageSize);
SearchQuery query = SearchQueryUtils.getQuery(vclassURIs, alpha, locale, page, pageSize);
IndividualListQueryResults results = IndividualListQueryResults.runQuery(query, indDao);
log.debug("Executed search query for " + vclassURIs);
if (results.getIndividuals().isEmpty()) {
Expand Down
Expand Up @@ -70,5 +70,11 @@ public class VitroSearchTermNames {

/** Source institution name */
public static final String SITE_NAME = "siteName";

/** Multilingual sort field suffix */
public static final String LABEL_SORT_SUFFIX = "_label_sort";

/** Multilingual label field suffix */
public static final String LABEL_DISPLAY_SUFFIX = "_label_display";

}
Expand Up @@ -36,6 +36,7 @@
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
import edu.cornell.mannlib.vitro.webapp.utils.searchengine.SearchQueryUtils;

/**
* AutocompleteController generates autocomplete content
Expand Down Expand Up @@ -127,7 +128,10 @@ else if ( parts.length == 1 ) {
for (SearchResultDocument doc : docs) {
try {
String uri = doc.getStringValue(VitroSearchTermNames.URI);
String name = doc.getStringValue(VitroSearchTermNames.NAME_RAW);
String name = doc.getStringValue(SearchQueryUtils.getLabelFieldNameForLocale(vreq.getLocale()));
if (name == null) {
name = doc.getStringValue(VitroSearchTermNames.NAME_RAW);
}
//There may be multiple most specific types, sending them all back
String mst = doc.getStringValue(VitroSearchTermNames.MOST_SPECIFIC_TYPE_URIS);
//Assuming these will get me string values
Expand Down Expand Up @@ -184,7 +188,9 @@ private SearchQuery getQuery(String queryStr, VitroRequest vreq) {
addFilterQuery(query, typeParam, multipleTypesParam);
}

query.addFields(VitroSearchTermNames.NAME_RAW, VitroSearchTermNames.URI, VitroSearchTermNames.MOST_SPECIFIC_TYPE_URIS); // fields to retrieve
query.addFields(SearchQueryUtils.getLabelFieldNameForLocale(vreq.getLocale()),
VitroSearchTermNames.NAME_RAW, VitroSearchTermNames.URI,
VitroSearchTermNames.MOST_SPECIFIC_TYPE_URIS); // fields to retrieve

// Can't sort on multivalued field, so we sort the results in Java when we get them.
// query.addSortField(VitroSearchTermNames.NAME_LOWERCASE, Order.ASC);
Expand Down
Expand Up @@ -5,8 +5,8 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;

Expand All @@ -21,7 +21,7 @@ public class BaseSearchQuery implements SearchQuery {
private int rows = -1;

private final Set<String> fieldsToReturn = new HashSet<>();
private final Map<String, SearchQuery.Order> sortFields = new HashMap<>();
private final Map<String, SearchQuery.Order> sortFields = new LinkedHashMap <>();
private final Set<String> filters = new HashSet<>();

private final Set<String> facetFields = new HashSet<>();
Expand Down
@@ -0,0 +1,72 @@
package edu.cornell.mannlib.vitro.webapp.searchengine.solr;

import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.LABEL_DISPLAY_SUFFIX;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.LABEL_SORT_SUFFIX;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrClient;
import org.apache.solr.client.solrj.request.schema.SchemaRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.schema.SchemaResponse;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.SimpleOrderedMap;

public class SolrFieldInitializer {

static void initializeFields(SolrClient queryEngine, ConcurrentUpdateSolrClient updateEngine) throws Exception {
Set<String> fieldSuffixes = new HashSet<>(Arrays.asList(LABEL_SORT_SUFFIX, LABEL_DISPLAY_SUFFIX));
excludeMatchedFields(fieldSuffixes, queryEngine, "dynamicFields");
excludeMatchedFields(fieldSuffixes, queryEngine, "fields");
createMissingFields(fieldSuffixes, updateEngine);
}

private static void createMissingFields(Set<String> fieldSuffixes, ConcurrentUpdateSolrClient updateEngine)
throws Exception {
for (String suffix : fieldSuffixes) {
Map<String, Object> fieldAttributes = getFieldAttributes(suffix);
SchemaRequest.AddDynamicField request = new SchemaRequest.AddDynamicField(fieldAttributes);
SchemaResponse.UpdateResponse response = request.process(updateEngine);
if (response.getStatus() != 0) {
throw new Exception("Creation of missing solr field '*" + suffix + "' failed");
}
}
}

private static Map<String, Object> getFieldAttributes(String suffix) {
Map<String, Object> fieldAttributes = new HashMap<String, Object>();
fieldAttributes.put("type", "string");
fieldAttributes.put("stored", "true");
fieldAttributes.put("indexed", "true");
fieldAttributes.put("name", "*" + suffix);
return fieldAttributes;
}

private static void excludeMatchedFields(Set<String> fieldSuffixes, SolrClient queryEngine, String fieldType)
throws Exception {
SolrQuery query = new SolrQuery();
query.add(CommonParams.QT, "/schema/" + fieldType.toLowerCase());
QueryResponse response = queryEngine.query(query);
ArrayList<SimpleOrderedMap> fieldList = (ArrayList<SimpleOrderedMap>) response.getResponse().get(fieldType);
if (fieldList == null) {
return;
}
Set<String> it = new HashSet<>(fieldSuffixes);
for (String target : it) {
for (SimpleOrderedMap field : fieldList) {
String fieldName = (String) field.get("name");
if (fieldName.endsWith(target)) {
fieldSuffixes.remove(target);
}
}
}
}

}
Expand Up @@ -76,7 +76,9 @@ public void startup(Application application, ComponentStartupStatus css) {
// no apparent 7.4.0 analogy to `setPollQueueTime(25)`

updateEngine = updateBuilder.build();


SolrFieldInitializer.initializeFields(queryEngine, updateEngine);

css.info("Set up the Solr search engine; URL = '" + solrServerUrlString + "'.");
} catch (Exception e) {
css.fatal("Could not set up the Solr search engine", e);
Expand Down
Expand Up @@ -52,13 +52,13 @@ public class SelectQueryDocumentModifier implements DocumentModifier,
private static final Log log = LogFactory
.getLog(SelectQueryDocumentModifier.class);

private RDFService rdfService;
protected RDFService rdfService;

/** A name to be used in logging, to identify this instance. */
private String label;
protected String label;

/** The queries to be executed. There must be at least one. */
private List<String> queries = new ArrayList<>();
protected List<String> queries = new ArrayList<>();

/**
* The names of the fields where the results of the queries will be stored.
Expand Down Expand Up @@ -128,7 +128,7 @@ public void modifyDocument(Individual ind, SearchInputDocument doc) {
}
}

private boolean passesTypeRestrictions(Individual ind) {
protected boolean passesTypeRestrictions(Individual ind) {
if (typeRestrictions.isEmpty()) {
return true;
} else {
Expand Down
@@ -0,0 +1,111 @@
/* $This file is distributed under the terms of the license in LICENSE$ */

package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;

import static edu.cornell.mannlib.vitro.webapp.utils.sparqlrunner.SparqlQueryRunner.createSelectQueryContext;
import static edu.cornell.mannlib.vitro.webapp.i18n.selection.LocaleSelectionSetup.PROPERTY_SELECTABLE_LOCALES;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.rdfservice.filter.LanguageFilteringRDFService;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationReader;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
import edu.cornell.mannlib.vitro.webapp.utils.sparqlrunner.QueryHolder;

/**
* A variation on SelectQueryDocumentModifier where the suffix of target field is defined.
* Multiple queries are performed for each of locales configured in runtime.properties
*
* Target field names are composed of locale + fieldSuffix.
*
* Each query should contain a ?uri variable, which will be replaced by the URI
* of the individual.
*
* All of the other result fields in each row of each query will be converted to
* strings and added to the field.
*
*/
public class SelectQueryI18nDocumentModifier extends SelectQueryDocumentModifier
implements DocumentModifier, ContextModelsUser, ConfigurationReader {
private static final Log log = LogFactory.getLog(SelectQueryI18nDocumentModifier.class);

private String fieldSuffix = "";

private ArrayList<String> locales = new ArrayList<>();

@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasTargetSuffix")
public void setTargetSuffix(String fieldSuffix) {
this.fieldSuffix = fieldSuffix;
}

@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
if (passesTypeRestrictions(ind) && StringUtils.isNotBlank(fieldSuffix)) {
List<Map<String, List<String>>> maps = getTextForQueries(ind);
for (Map<String, List<String>> map : maps) {
for (String locale : map.keySet()) {
List<String> values = map.get(locale);
String fieldName = locale + fieldSuffix;
doc.addField(fieldName, values);
}
}
}
}

protected List<Map<String, List<String>>> getTextForQueries(Individual ind) {
List<Map<String, List<String>>> list = new ArrayList<>();
for (String query : queries) {
list.add(getTextForQuery(query, ind));
}
return list;
}

protected Map<String, List<String>> getTextForQuery(String query, Individual ind) {
try {
QueryHolder queryHolder = new QueryHolder(query).bindToUri("uri", ind.getURI());
Map<String, List<String>> mapLocaleToFields = new HashMap<>();
for (String locale : locales) {
LanguageFilteringRDFService lfrs = new LanguageFilteringRDFService(rdfService,
Collections.singletonList(locale));
List<String> list = createSelectQueryContext(lfrs, queryHolder).execute().toStringFields().flatten();
mapLocaleToFields.put(locale, list);
log.debug(label + " for locale " + locale + " - query: '" + query + "' returns " + list);
}
return mapLocaleToFields;
} catch (Throwable t) {
log.error("problem while running query '" + query + "'", t);
return Collections.emptyMap();
}
}

@Override
public void setConfigurationProperties(ConfigurationProperties config) {
String property = config.getProperty(PROPERTY_SELECTABLE_LOCALES);
if (!StringUtils.isBlank(property)) {
String[] values = property.trim().split("\\s*,\\s*");
for (String value : values) {
String locale = value.replace("_", "-");
addLocale(locale);
}
}
}

private void addLocale(String localeString) {
if (StringUtils.isBlank(localeString)) {
return;
}
locales.add(localeString);
}

}
@@ -0,0 +1,13 @@
/* $This file is distributed under the terms of the license in LICENSE$ */

package edu.cornell.mannlib.vitro.webapp.utils.configuration;

import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;

/**
* When the ConfigurationBeanLoader creates an instance of this class, it will
* call this method, supplying ConfigurationProperties.
*/
public interface ConfigurationReader {
void setConfigurationProperties(ConfigurationProperties properties);
}