Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

eLibrary.ru: Update for site changes #3289

Merged
merged 3 commits into from
Jun 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 89 additions & 57 deletions eLibrary.ru.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2023-04-20 20:02:47"
"lastUpdated": "2024-06-24 19:34:39"
}

/*
Expand Down Expand Up @@ -46,31 +46,28 @@ function detectWeb(doc, url) {
return false;
}

function doWeb(doc, url) {
var articles = [];
async function doWeb(doc, url) {
if (detectWeb(doc, url) == "multiple") {
var results = ZU.xpath(doc, '//table[@id="restab"]/tbody/tr[starts-with(@id, "arw")]/td[2]');
// Zotero.debug('results.length: ' + results.length);
var items = {};
for (let i = 0; i < results.length; i++) {
// Zotero.debug('result [' + i + '] text: ' + results[i].textContent);
var title = ZU.xpathText(results[i], './a');
var uri = ZU.xpathText(results[i], ' ./a/@href');
var title = ZU.xpathText(results[i], './/a');
var uri = ZU.xpathText(results[i], ' .//a/@href');
if (!title || !uri) continue;
items[uri] = fixCasing(title);
}
Zotero.selectItems(items, function (items) {
if (!items) {
return;
}
for (let i in items) {
articles.push(i);
}
Zotero.Utilities.processDocuments(articles, scrape);
});
items = await Zotero.selectItems(items);
if (!items) {
return;
}
for (let url of Object.keys(items)) {
await scrape(await requestDocument(url));
}
}
else {
scrape(doc, url);
await scrape(doc, url);
}
}

Expand Down Expand Up @@ -103,6 +100,9 @@ function getDocType(doc) {
case "публикация в сборнике трудов конференции":
itemType = "conferencePaper";
break;
case "тезисы доклада на конференции":
itemType = "conferencePaper";
break;
default:
Zotero.debug("Unknown type: " + docType + ". Using 'journalArticle'");
itemType = "journalArticle";
Expand All @@ -111,21 +111,57 @@ function getDocType(doc) {
return itemType;
}

function scrape(doc, url) {
async function scrape(doc, url = doc.location.href) {
if (doc.querySelector('.help.pointer') && !doc.querySelector('.help.pointer[title]')) {
// Full author names are in the HTML at page load but are stripped and replaced with
// JS tooltips. Try to reload the page and see if we can get the tooltips. If we
// still get a page without tooltips, we might've hit a captcha (seems to commonly
// happen when requesting from a US IP), so don't worry about it.
Zotero.debug('Re-requesting to get original HTML');
try {
let newDoc = await requestDocument(url, {
headers: { Referer: url }
});
if (newDoc.querySelector('.help.pointer[title]')) {
doc = newDoc;
}
else {
Zotero.debug('Hit a captcha? ' + newDoc.location.href);
}
}
catch (e) {
Zotero.debug('Failed: ' + e);
}
}

var item = new Zotero.Item();
item.itemType = getDocType(doc);
item.title = fixCasing(doc.title);
item.url = url;

var rightPart = doc.getElementById("leftcol").nextSibling;
var centralColumn = ZU.xpath(rightPart, './table/tbody/tr[2]/td[@align="left"]');
var datablock = ZU.xpath(centralColumn, './div[1]');
var datablock = ZU.xpath(centralColumn, './div[2]');

var authors = ZU.xpath(datablock, './/table[1]//b');
var authors = ZU.xpath(datablock, './/table[1]/tbody/tr/td[2]//b');
// Zotero.debug('authors.length: ' + authors.length);

for (let i = 0; i < authors.length; i++) {
var dirty = authors[i].textContent;
for (let author of authors) {
let dirty = author.textContent;
try {
let tooltipParent = author.closest('.help.pointer[title]');
if (tooltipParent) {
let tooltipHTML = tooltipParent.getAttribute('title');
let tooltipAuthorName = text(new DOMParser().parseFromString(tooltipHTML, 'text/html'), 'font');
if (tooltipAuthorName) {
dirty = tooltipAuthorName;
}
}
}
catch (e) {
Zotero.debug(e);
}

// Zotero.debug('author[' + i + '] text: ' + dirty);

/* Common author field formats are:
Expand Down Expand Up @@ -182,6 +218,7 @@ function scrape(doc, url) {
Номер: "issue",
ISSN: "ISSN",
"Число страниц": "pages", // e.g. "83"
Страницы: "pages",
Язык: "language",
"Место издания": "place"
};
Expand Down Expand Up @@ -255,7 +292,6 @@ function scrape(doc, url) {
item.complete();
}


/** BEGIN TEST CASES **/
var testCases = [
{
Expand All @@ -269,7 +305,7 @@ var testCases = [
"items": [
{
"itemType": "journalArticle",
"title": "Иноязычные заимствования в художественной прозе на иврите в XX в",
"title": "Иноязычные заимствования в художественной прозе на иврите в XX в.",
"creators": [
{
"firstName": "М. В.",
Expand Down Expand Up @@ -301,27 +337,27 @@ var testCases = [
"title": "Использование Молекулярно-Генетических Методов Установления Закономерностей Наследования Для Выявления Доноров Значимых Признаков Яблони",
"creators": [
{
"firstName": "Иван Иванович",
"firstName": "И. И.",
"lastName": "Супрун",
"creatorType": "author"
},
{
"firstName": "Елена Владимировна",
"firstName": "Е. В.",
"lastName": "Ульяновская",
"creatorType": "author"
},
{
"firstName": "Евгений Николаевич",
"firstName": "Е. Н.",
"lastName": "Седов",
"creatorType": "author"
},
{
"firstName": "Галина Алексеевна",
"firstName": "Г. А.",
"lastName": "Седышева",
"creatorType": "author"
},
{
"firstName": "Зоя Михайловна",
"firstName": "З. М.",
"lastName": "Серова",
"creatorType": "author"
}
Expand Down Expand Up @@ -455,14 +491,14 @@ var testCases = [
},
{
"type": "web",
"url": "https://elibrary.ru/item.asp?id=20028198",
"url": "https://www.elibrary.ru/item.asp?id=20028198",
"items": [
{
"itemType": "book",
"title": "Аппарат издания и правила оформления",
"creators": [
{
"firstName": "Людмила Павловна",
"firstName": "Л. П.",
"lastName": "Стычишина",
"creatorType": "author"
},
Expand All @@ -475,7 +511,7 @@ var testCases = [
"language": "ru",
"libraryCatalog": "eLibrary.ru",
"publisher": "Изд-во Политехнического университета",
"url": "https://elibrary.ru/item.asp?id=20028198",
"url": "https://www.elibrary.ru/item.asp?id=20028198",
"attachments": [],
"tags": [
{
Expand Down Expand Up @@ -537,7 +573,7 @@ var testCases = [
"date": "2019",
"DOI": "10.31857/S0869-56524863275-279",
"ISSN": "0869-5652",
"abstractNote": "Для классов графиков -отображений нильпотентных градуированных групп доказана формула площади на сублоренцевых структурах произвольной глубины с многомерным временем.",
"abstractNote": "Для классов графиков - отображений нильпотентных градуированных групп доказана формула площади на сублоренцевых структурах произвольной глубины с многомерным временем.",
"issue": "3",
"language": "ru",
"libraryCatalog": "eLibrary.ru",
Expand All @@ -547,27 +583,6 @@ var testCases = [
"volume": "486",
"attachments": [],
"tags": [
{
"tag": "Contact Mapping"
},
{
"tag": "Graph-Mapping"
},
{
"tag": "Intrinsic Basis"
},
{
"tag": "Multidimensional Time"
},
{
"tag": "Nilpotent Graded Group"
},
{
"tag": "Sub-Lorentzian Structure"
},
{
"tag": "Surface Area"
},
{
"tag": "Внутренний Базис"
},
Expand Down Expand Up @@ -736,7 +751,7 @@ var testCases = [
},
{
"type": "web",
"url": "https://www.elibrary.ru/item.asp?id=22208210",
"url": "https://elibrary.ru/item.asp?id=22208210",
Comment on lines -739 to +754
Copy link
Contributor

@alex-ter alex-ter Apr 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the opposite of all the other test URL changes. When opening those URLs manually, I see that www. part is actually not needed and it's stripped by the web server. In view of that, this particular change seems meaningful, while the opposite ones (adding the www.) are questionable. Why those are needed?

Copy link
Contributor

@alex-ter alex-ter Apr 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

edit note: if you read the email notification or the web page, earlier, version of this comment, disregard that, I updated it significantly based on further analysis.

I ran a couple of tests using Scaffold in both Z6 and Z7 (now with your changes applied, unlike the first time I wrote this comment - sorry for the noise), and looks like it's the Z6 peculiarity - it apparently "toggles" that www. part - adds it where it's absent and removes it where it's present, which is weird. Z7 on the contrary, only adds, but does not remove. @AbeJellinek, I wonder whether this needs further analysis, what do you think? Z6 behaviour looks completely incorrect, but I'm not sure Z7's is fully correct either.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's right, but from my experience it is more random. For instance you can run It several times and occasionally it'll change. I had only url in diff and not all the time. I tried both Z6 and Z7, maybe you are right about Z7, but Z6 seems to do it randomly and only with this website, haven't seen this happening for other translators. So usually I thought it is the site issue, this toggling, then blocking tests with captcha.

"items": [
{
"itemType": "journalArticle",
Expand All @@ -750,6 +765,7 @@ var testCases = [
],
"date": "2013",
"ISSN": "0025-2344",
"abstractNote": "The present study extends the findings of Lynn (2010), who reported higher mean IQ in northern than southern Italy and of Templer (2012), who found biological correlates of IQ in the Italian regions. The present study found that murder and attempted murder rates were associated with Mediterranean/Mideastern characteristics (lower IQ, black hair, black eyes) and that lower murder rates were associated with central/northern European characteristics (higher cephalic index, blond hair, blue eyes, and higher multiple sclerosis and schizophrenia rates). The eye and hair color findings are consistent with the human and animal literature finding of darker coloration associated with greater aggression. © Copyright 2013.",
"issue": "1",
"language": "en",
"libraryCatalog": "eLibrary.ru",
Expand All @@ -758,15 +774,31 @@ var testCases = [
"url": "https://www.elibrary.ru/item.asp?id=22208210",
"volume": "54",
"attachments": [],
"tags": [],
"tags": [
{
"tag": "Eye Color"
},
{
"tag": "Hair Color"
},
{
"tag": "Iq"
},
{
"tag": "Italy"
},
{
"tag": "Murder"
}
],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "https://elibrary.ru/item.asp?id=35209757",
"url": "https://www.elibrary.ru/item.asp?id=35209757",
"items": [
{
"itemType": "journalArticle",
Expand All @@ -778,7 +810,7 @@ var testCases = [
"creatorType": "author"
},
{
"firstName": "Галина Георгиевна",
"firstName": "Г. Г.",
"lastName": "Харсеева",
"creatorType": "author"
},
Expand All @@ -802,7 +834,7 @@ var testCases = [
"libraryCatalog": "eLibrary.ru",
"pages": "375-378",
"publicationTitle": "Клиническая Лабораторная Диагностика",
"url": "https://elibrary.ru/item.asp?id=35209757",
"url": "https://www.elibrary.ru/item.asp?id=35209757",
"volume": "63",
"attachments": [],
"tags": [
Expand Down
Loading