Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue99 #1946

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
59 changes: 56 additions & 3 deletions chrome/content/zotero/xpcom/recognizePDF.js
Expand Up @@ -460,7 +460,10 @@ Zotero.RecognizePDF = new function () {
Zotero.debug('RecognizePDF: ' + e);
}
}


if (!res.doi) {
res.doi = _extractDOIFromFileName(fileName);
}
if (res.doi) {
Zotero.debug(`RecognizePDF: Getting metadata for DOI (${res.doi})`);
let translate = new Zotero.Translate.Search();
Expand Down Expand Up @@ -489,7 +492,10 @@ Zotero.RecognizePDF = new function () {
Zotero.debug("RecognizePDF: No translators found");
}
}


if (!res.isbn) {
res.isbn = _extractISBNFromFileName(fileName);
}
if (res.isbn) {
Zotero.debug(`RecognizePDF: Getting metadata by ISBN ${res.isbn}`);
let translate = new Zotero.Translate.Search();
Expand Down Expand Up @@ -561,6 +567,9 @@ Zotero.RecognizePDF = new function () {
newItem.setCreators(creators);

if (res.abstract) newItem.setField('abstractNote', res.abstract);
if (!res.year) {
res.year = _extractYearFromFileName(fileName);
}
if (res.year) newItem.setField('date', res.year);
if (res.pages) newItem.setField('pages', res.pages);
if (res.volume) newItem.setField('volume', res.volume);
Expand All @@ -585,7 +594,51 @@ Zotero.RecognizePDF = new function () {

return null;
}


/**
* Tries to extract DOI from a filename
* @param {String} fileName - Filename to examine
* @return {String} - Found DOI or null if nothing found.
*/
function _extractDOIFromFileName(fileName) {
// Strip off file extension
let strippedName = fileName.substring(0, fileName.lastIndexOf("."));
// Replace `@` sign with `/` (`@` sign commonly used because `/` not
// allowed in filenames).
strippedName = strippedName.replace("@", "/");
let foundDOI = Zotero.Utilities.cleanDOI(strippedName);
Zotero.debug(`Found DOI ${doiString} in filename`);
return doiString;
}

/**
* Tries to extract ISBN from a filename
* @param {String} fileName - Filename to examine
* @return {String} - Found ISBN or null if nothing found.
*/
function _extractISBNFromFileName(fileName) {
let isbnString = Zotero.Utilities.cleanISBN(fileName);
Zotero.debug(`Found ISBN ${isbnString} in filename`);
return isbnString;
}

/**
* Tries to extract Year from a filename
* @param {String} fileName - Filename to examine
* @return {String} - Found year or null if nothing found.
*/
function _extractYearFromFileName(fileName) {
let yearRe = /\D((?:19|20)\d{2})\D?/;
let yearRes = fileName.match(yearRe);
patricknaughton01 marked this conversation as resolved.
Show resolved Hide resolved
if (!yearRes) {
return null;
}
// Grab first element of array (first captured group)
let yearString = yearRes[1];
Zotero.debug(`Found year ${yearString} in filename`);
return yearString;
}

/**
* To customize the recognizer endpoint, set either recognize.url (used directly)
* or services.url (used with a 'recognizer/' suffix).
Expand Down