Skip to content

Commit

Permalink
Merge pull request DSpace#9642 from DSpace/backport-9594-to-dspace-7_x
Browse files Browse the repository at this point in the history
[Port dspace-7_x] Metadata Import via Scopus API: improved handling of empty search results
  • Loading branch information
tdonohue committed Jun 7, 2024
2 parents e899199 + 8e56fdd commit 2bf663e
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public Collection<MetadatumDTO> contributeMetadata(Element element) {
}

/**
* Retrieve the the ScopusID, orcid, author name and affiliationID
* Retrieve the ScopusID, orcid, author name and affiliationID
* metadata associated with the given element object.
* If the value retrieved from the element is empty
* it is set PLACEHOLDER_PARENT_METADATA_VALUE
Expand All @@ -82,11 +82,19 @@ public Collection<MetadatumDTO> contributeMetadata(Element element) {
private List<MetadatumDTO> getMetadataOfAuthors(Element element) throws JaxenException {
List<MetadatumDTO> metadatums = new ArrayList<MetadatumDTO>();
Element authname = element.getChild("authname", NAMESPACE);
Element surname = element.getChild("surname", NAMESPACE);
Element givenName = element.getChild("given-name", NAMESPACE);
Element scopusId = element.getChild("authid", NAMESPACE);
Element orcid = element.getChild("orcid", NAMESPACE);
Element afid = element.getChild("afid", NAMESPACE);

addMetadatum(metadatums, getMetadata(getElementValue(authname), this.authname));
if (authname != null) {
addMetadatum(metadatums, getMetadata(getElementValue(authname), this.authname));
} else {
addMetadatum(metadatums, getMetadata(getElementValue(surname) + ", " +
getElementValue(givenName), this.authname));
}

addMetadatum(metadatums, getMetadata(getElementValue(scopusId), this.scopusId));
addMetadatum(metadatums, getMetadata(getElementValue(orcid), this.orcid));
addMetadatum(metadatums, getMetadata(StringUtils.isNotBlank(afid.getValue())
Expand Down Expand Up @@ -170,4 +178,4 @@ public void setAffiliation(MetadataFieldConfig affiliation) {
this.affiliation = affiliation;
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand All @@ -24,6 +25,8 @@
import javax.el.MethodNotFoundException;

import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.content.Item;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.datamodel.Query;
Expand Down Expand Up @@ -62,6 +65,8 @@ public class ScopusImportMetadataSourceServiceImpl extends AbstractImportMetadat
@Autowired
private LiveImportClient liveImportClient;

private final static Logger log = LogManager.getLogger();

public LiveImportClient getLiveImportClient() {
return liveImportClient;
}
Expand Down Expand Up @@ -200,6 +205,9 @@ public Integer call() throws Exception {
Map<String, String> requestParams = getRequestParameters(query, null, null, null);
params.put(URI_PARAMETERS, requestParams);
String response = liveImportClient.executeHttpGetRequest(timeout, url, params);
if (StringUtils.isEmpty(response)) {
return 0;
}

SAXBuilder saxBuilder = new SAXBuilder();
// disallow DTD parsing to ensure no XXE attacks can occur
Expand Down Expand Up @@ -245,6 +253,10 @@ public List<ImportRecord> call() throws Exception {
Map<String, String> requestParams = getRequestParameters(queryString, viewMode, null, null);
params.put(URI_PARAMETERS, requestParams);
String response = liveImportClient.executeHttpGetRequest(timeout, url, params);
if (StringUtils.isEmpty(response)) {
return results;
}

List<Element> elements = splitToRecords(response);
for (Element record : elements) {
results.add(transformSourceRecords(record));
Expand Down Expand Up @@ -304,6 +316,10 @@ public List<ImportRecord> call() throws Exception {
Map<String, String> requestParams = getRequestParameters(queryString, viewMode, start, count);
params.put(URI_PARAMETERS, requestParams);
String response = liveImportClient.executeHttpGetRequest(timeout, url, params);
if (StringUtils.isEmpty(response)) {
return results;
}

List<Element> elements = splitToRecords(response);
for (Element record : elements) {
results.add(transformSourceRecords(record));
Expand Down Expand Up @@ -349,6 +365,10 @@ public List<ImportRecord> call() throws Exception {
Map<String, String> requestParams = getRequestParameters(queryString, viewMode, start, count);
params.put(URI_PARAMETERS, requestParams);
String response = liveImportClient.executeHttpGetRequest(timeout, url, params);
if (StringUtils.isEmpty(response)) {
return results;
}

List<Element> elements = splitToRecords(response);
for (Element record : elements) {
results.add(transformSourceRecords(record));
Expand Down Expand Up @@ -383,10 +403,16 @@ private List<Element> splitToRecords(String recordsSrc) {
saxBuilder.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();
List<Element> records = root.getChildren("entry",Namespace.getNamespace("http://www.w3.org/2005/Atom"));
String totalResults = root.getChildText("totalResults", Namespace.getNamespace("http://a9.com/-/spec/opensearch/1.1/"));
if (totalResults != null && "0".equals(totalResults)) {
log.debug("got Scopus API with empty response");
return Collections.emptyList();
}
List<Element> records = root.getChildren("entry", Namespace.getNamespace("http://www.w3.org/2005/Atom"));
return records;
} catch (JDOMException | IOException e) {
return new ArrayList<Element>();
log.warn("got unexpected XML response from Scopus API: " + e.getMessage());
return Collections.emptyList();
}
}

Expand Down Expand Up @@ -422,4 +448,4 @@ public void setInstKey(String instKey) {
this.instKey = instKey;
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public void scopusImportMetadataGetRecordsCountTest() throws Exception {
}

@Test
public void scopusImportMetadataGetRecordsEmptyResponceTest() throws Exception {
public void scopusImportMetadataGetRecordsEmptyResponseTest() throws Exception {
context.turnOffAuthorisationSystem();
String originApiKey = scopusServiceImpl.getApiKey();
if (StringUtils.isBlank(originApiKey)) {
Expand All @@ -113,8 +113,7 @@ public void scopusImportMetadataGetRecordsEmptyResponceTest() throws Exception {

context.restoreAuthSystemState();
Collection<ImportRecord> recordsImported = scopusServiceImpl.getRecords("roma", 0, 20);
ImportRecord importedRecord = recordsImported.iterator().next();
assertTrue(importedRecord.getValueList().isEmpty());
assertTrue(recordsImported.isEmpty());
} finally {
liveImportClientImpl.setHttpClient(originalHttpClient);
scopusServiceImpl.setApiKey(originApiKey);
Expand Down Expand Up @@ -229,4 +228,4 @@ private ArrayList<ImportRecord> getRecords() {
return records;
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
<opensearch:totalResults>0</opensearch:totalResults>
<opensearch:startIndex>0</opensearch:startIndex>
<opensearch:itemsPerPage>0</opensearch:itemsPerPage>
<opensearch:Query role="request" searchTerms="(probizna)" startPage="0"/>
<link ref="self" href="https://api.elsevier.com/content/search/scopus?start=0&amp;count=2&amp;query=%28testemptyresp%29&amp;view=COMPLETE" type="application/xml"/>
<opensearch:Query role="request" searchTerms="a-query-without-hits" startPage="0"/>
<link ref="self"
href="https://api.elsevier.com/content/search/scopus?start=0&amp;count=10&amp;query=a-query-without-hits&amp;httpAccept=application%2Fxml"
type="application/xml"/>
<entry>
<error>Result set was empty</error>
</entry>
</search-results>
</search-results>

0 comments on commit 2bf663e

Please sign in to comment.