In [None]:
import csv
import re
import unicodedata
from unidecode import unidecode

### Proof flag
Change the following to `IS_PROOF = False` if links should go to public IsisCB site.

Change `INCLUDE_ALREADY_PRINTED` to `INCLUDE_ALREADY_PRINTED = True` if already printed publications should be included (publications that have a date in the column 'Published Print'.

In [None]:
IS_PROOF = True
INCLUDE_ALREADY_PRINTED = False
IGNORE_DATASET = False
IGNORE_CATEGORIES = False

DEV_MODE = True # change this to true to use production file paths

In [None]:
DATABASE_TO_PRINT = "Isis Bibliography of the History of Science (Stephen P. Weldon, ed.)"


### File paths
Set the following paths to files that this notebook should read or write.
- `export_file`: path to file that contains the IsisCB export (should be a csv file)
- `result_file_path`: path to file that will contain all publications except book reviews
- `reviews_file_path`: path to file that will contain all book reviews
- `errors_file_path`: path to file that will contain all errors
- `author_index_file_path`: path to file that will contain the author index

If you run this notebook on a Windows machine, make sure to encode all backslashes in path names (e.g. `\` should be `\\`).

In [None]:
if DEV_MODE:
    export_file = '/Users/jdamerow/UpConsulting/files/latex/new-printdata.csv'  #
    result_file_path = '/Users/jdamerow/UpConsulting/files/latex/result1.txt'
    reviews_file_path = '/Users/jdamerow/UpConsulting/files/latex/reviews1.txt'
    errors_file_path = '/Users/jdamerow/UpConsulting/files/latex/errors1.csv'
    author_index_file_path = '/Users/jdamerow/UpConsulting/files/latex/index1.tex'
    subject_index_file_path = '/Users/jdamerow/UpConsulting/files/latex/subj_index.tex'
else:
    export_file = "C:\\Users\\Stephen\\Dropbox (Univ. of Oklahoma)\\CB Directory\\PRINT\\printdata.csv"
    result_file_path = 'C:\\Users\\Stephen\\Dropbox (Univ. of Oklahoma)\\CB Directory\\PRINT\\biblist.tex'
    reviews_file_path = 'C:\\Users\\Stephen\\Dropbox (Univ. of Oklahoma)\\CB Directory\\PRINT\\bookreviews.tex'
    errors_file_path = 'C:\\Users\\Stephen\\Dropbox (Univ. of Oklahoma)\\CB Directory\\PRINT\\errors.csv'
    author_index_file_path = 'C:\\Users\\Stephen\\Dropbox (Univ. of Oklahoma)\\CB Directory\\PRINT\\author_index.tex'
    subject_index_file_path = 'C:\\Users\\Stephen\\Dropbox (Univ. of Oklahoma)\\CB Directory\\PRINT\\subj_index.tex'

In [None]:
SUBJECTS_TO_EXCLUDE = [
    'CBA000113709'
]

### Code
The following cells contain the code that will turn the exported csv file into latex. Do not change unless you know what you are doing.

In [None]:
author_template = "\\textsc{%s}, %s"
author_template_organization = "\\textsc{%s}"
isbn_template = "\\textsc{isbn}: %s."
CBB_link_template = "\\textsc{\\href{https://data.isiscb.org/isis/citation/%s}{%s}}"
CBB_link_template_proof = "\\textsc{\\href{https://data.isiscb.org/isis/curation/citation/%s}{%s}}"

latex_template_description = """
\\begin{isisdescription} %s\\end{isisdescription}
"""

In [None]:
AUTHOR_SEPARATER = "; "
CHAPTER_SEPARATER = "; "

In [None]:
class DocumentError():
    def __init__(self, doc, error, error_type):
        self.doc = doc
        self.error = error
        self.error_type = error_type
        

In [None]:
errors = []

In [None]:
def generate_link_latex(cbb_nr):
    if IS_PROOF:
        return CBB_link_template_proof%(cbb_nr, cbb_nr)
    return CBB_link_template%(cbb_nr, cbb_nr)

In [None]:
def get_last_first(name):
    if ',' in name:
        last = name[:name.index(',')]
        first = name[name.index(',')+1:]
    else:
        last = name[name.rfind(" "):].strip()
        first = name[:name.rfind(" ")].strip()
    return last, first

In [None]:
class Author:
    def __init__(self):
        self.name = ''
        self.order = 0
        self.author_type = ""
        
    def get_last_name(self):
        if self.author_type == "Person":
            return get_last_first(self.name)[0]
        return self.name
        
    def get_first_name(self):
        if self.author_type == "Person":
            return get_last_first(self.name)[1]
        return ""
        
    def get_name_for_sort(self):
        return unicodedata.normalize('NFD', self.get_last_name().lower()).encode('ascii', 'ignore').decode('utf8') + ", " + unicodedata.normalize('NFD', self.get_first_name().lower()).encode('ascii', 'ignore').decode('utf8')
    
    def get_order(self):
        try:
            float(self.order)
            return float(self.order)
        except:
            print("Not a valid order value " + str(self.order))
            return 0

In [None]:
def build_persons_latex_list(persons):
    persons_latex = []
    if persons:
        for p in persons:
            if p.author_type == 'Person':
                last, first = get_last_first(p.name)
                persons_latex.append(author_template%(last, first))
            else:
                persons_latex.append(author_template_organization%(p.name))
    return persons_latex
  

In [None]:
def prepare_author_editor_string(author_editors_latex):
    author_editor_string = AUTHOR_SEPARATER.join(author_editors_latex).strip()
    if author_editor_string:
        if not author_editor_string.endswith("."):
            author_editor_string = author_editor_string + "."
        return author_editor_string + " "
    return ""

In [None]:
def prepare_title(title_string, append_period=True):
    title_string = title_string.replace(" - ", "--").replace(" -- ", "---")
    if title_string[len(title_string)-1:] not in ["?", "!", ":", ";", "."] and append_period:
        return title_string + "."
    return title_string

In [None]:
def handle_special_chars(text_string):
    text_string = re.sub(r"( |^)'([a-zA-Z0-9\"])", r"\1`\2", text_string)
    text_string = re.sub(r"( |^)[\"“„]([a-zA-Z0-9\[\]`])", r"\1``\2", text_string)
    text_string = re.sub(r"([a-zA-Z0-9\[\]\.\?!])[\"”“]([ \.:!\?\[\]]|$)", r"\1''\2", text_string)
    text_string = re.sub("’", "'", text_string)
    if "\"'" in text_string:
        text_string = text_string.replace("\"'", "''\\hspace{0.4mm}'")
    text_string = re.sub("_", "\\_", text_string)
    text_string = text_string.replace("─", "---").replace("–", "---").replace("…", "\\dots")
    return text_string

In [None]:
def handle_html_tags(text_string, doc, set_non_italic=False):
    if not set_non_italic:
        text_string = re.sub(r"<em>(.+?)</em>", r"\\textit{\1}", text_string)
        text_string = re.sub(r"<i>(.+?)</i>", r"\\textit{\1}", text_string)
    else:
        text_string = re.sub(r"<em>(.+?)</em>", r"{\\normalfont \1}", text_string)
        text_string = re.sub(r"<i>(.+?)</i>", r"{\\normalfont \1}", text_string)
    text_string = re.sub(r"<sub>(.+?)</sub>", r"\\textsubscript{\1}", text_string)
    
    tag_regex = re.compile(r'<.*?>')
    found_tag = tag_regex.search(text_string)
    if found_tag:
        errors.append(DocumentError(doc, "Could not handle tag: " + found_tag.group(0), "WARNING"))
    return text_string

In [None]:
def add_space_before_quotes(text_string):
    if text_string.startswith("`") or text_string.startswith("\""):
        text_string = "\\hspace{0.4mm}" + text_string
    return text_string

In [None]:
def get_edition_string(edition_string):
    edition_string = edition_string.strip()
    if edition_string and not edition_string.endswith("."):
        edition_string = edition_string.strip() + "."
    if edition_string:
        edition_string = " " + edition_string
    return edition_string

In [None]:
def include_based_on_print_date(print_date):
    if INCLUDE_ALREADY_PRINTED:
        return True
    
    return print_date.strip() == ""

The following cells contain the publication classes and their latex templates.

In [None]:
class Document:
    def __init__(self):
        self.category_number = ''
        self.print_date = ''
        self.index = 0
        self.isiscb_id = ''
        self.title = ''
        self.year = 0
        self.database = ""
        self.edition = ""
        self.description = ""
        self.publisher = ''
        self.subjects = []

In [None]:
class Subject:
    def __init__(self):
        self.name = ''
        self.id = ''
        self.display_name = ''
        self.authority_type = ''
        self.acr_type = ''

In [None]:
latex_template_book = """
 
 
\\noindent\\begin{footnotesize}\\textbf{%(index)s}\\hspace{0.5em} %(authors)s\\textit{%(title)s}%(edition)s%(editors)s %(publisher)s, %(year)s. %(isbn)s %(link)s.
%(desc)s
%(chapters)s
%(reviews)s
\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""

latex_template_book_chapters = """
\\begin{isisdescription} Includes:  %s.\\end{isisdescription}
"""

latex_template_book_chapter_item = """
%s ``\\selectlanguage{french}%s\\selectlanguage{english}''%s%s
"""

chapter_ref=" [ref.~%s]"
editors_template = ", %s (Eds.)"
pages_template = " %s"

reviews_template = """
\\begin{isisdescription} Reviews: %s\\end{isisdescription}
"""

class Book(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.editors = []
        self.title = ''
        self.year = 0
        self.isbn = ''
        self.chapters = []
        self.chapter_ids = []
       
    def get_contributors_to_index(self):
        return self.authors + self.editors
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in (self.authors if self.authors else self.editors)])
    
    def get_author_string(self):
        return ", ".join(build_persons_latex_list(self.authors)) if self.authors else ""
        
    def get_editor_string(self, append_eds=True):
        editors_latex = []
        if self.editors:
            for e in self.editors:
                last, first = get_last_first(e.name)
                if last and first:
                    editors_latex.append(author_template%(last, first))
        eds_suffix = ""
        if editors_latex and append_eds:
            eds_suffix = " (Eds.) "
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.) "
                
        return prepare_author_editor_string(editors_latex) + eds_suffix
        
    def render(self, counter, reviews=None):
        authors_latex = build_persons_latex_list(self.authors)
        editors_latex = build_persons_latex_list(self.editors)
        
        author_editor_string = ""
        editors_string = ""
        if editors_latex:
            eds_suffix = " (Eds.) "
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.) "
                    
        if authors_latex:
            author_editor_string = prepare_author_editor_string(authors_latex)
            if editors_latex:
                editors_string = ", " + prepare_author_editor_string(editors_latex) + eds_suffix
        elif editors_latex:
            author_editor_string = prepare_author_editor_string(editors_latex) + eds_suffix

        isbn = ''
        if self.isbn:
            isbn = isbn_template%(self.isbn)
            
        # render chapters
        chapter_string = ""
        if self.chapters:
            chapter_latex = []
            for chap in self.chapters:
                chapter_authors = build_persons_latex_list(chap.authors)
                
                chap_pages = ''
                if chap.pages:
                    chap_pages = pages_template%(chap.pages)
                    
                chap_ref = ""
                if chap.index:
                    chap_ref = chapter_ref%(chap.index)

                chapter_latex.append((latex_template_book_chapter_item%(prepare_author_editor_string(chapter_authors), handle_html_tags(handle_special_chars(chap.title), chap), chap_pages, chap_ref)).strip())
            
            if chapter_latex:
                chapter_string = latex_template_book_chapters%(CHAPTER_SEPARATER.join(chapter_latex))
                
        reviews_string = ""
        if reviews:
            reviews_string = reviews_template%("[" + str(reviews.index) + "]")
            
        edition_string = get_edition_string(self.edition)
        if edition_string.endswith("."):
            edition_string = edition_string[:-1]
            
        title = handle_html_tags(handle_special_chars(self.title), self, set_non_italic=True)
        if edition_string or not editors_string:
            title = prepare_title(title)
            
        return latex_template_book%{"index":counter, "authors":author_editor_string, "title":title, "edition":edition_string, "editors":editors_string,
                                    "publisher":self.publisher, "year":self.year, "isbn":isbn, "link":generate_link_latex(self.isiscb_id), "desc":latex_template_description%(self.description) if self.description else "", "chapters":chapter_string, "reviews":reviews_string}
                                    


In [None]:
latex_template_article = """
 
 
\\noindent\\begin{footnotesize}\\textbf{%(counter)s}\\hspace{0.5em} %(author)s``%(title)s''%(edition)s \\textit{%(journal)s}%(volume)s%(issue)s (%(year)s)%(pages)s.%(publisher)s %(link)s.

%(description)s

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""
latex_template_article_publisher = " %s."

volume_template = " %s"
issue_template = ", no. %s"
pages_template = ": %s"
class Article(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.title = ''
        self.year = 0
        self.journal = ''
        self.journal_abbr = ''
        self.isbn = ''
        self.volume = ''
        self.issue = ''
        self.pages = ''
        
    def get_contributors_to_index(self):
        return self.authors
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def render(self, counter, reviews=None):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        vol = ''
        if self.volume:
            vol = volume_template%(self.volume)
        issue = ''
        if self.issue:
            issue = issue_template%(self.issue)
        pages = ''
        if self.pages:
            pages = pages_template%(self.pages)
            
        journal_text = self.journal_abbr if self.journal_abbr.strip() else self.journal
        if not journal_text:
            errors.append(DocumentError(self, "No Journal abbreviation or title found.", "ERROR"))
        return latex_template_article%{
            "counter":counter, 
            "author": prepare_author_editor_string(authors_latex), 
            "title": handle_html_tags(add_space_before_quotes(prepare_title(handle_special_chars(self.title))), self), 
            "edition": get_edition_string(self.edition), 
            "journal": journal_text, 
            "volume": vol, 
            "issue": issue, 
            "year": self.year, 
            "pages": pages, 
            "publisher": latex_template_article_publisher%(self.publisher) if self.publisher else "",
            "link": generate_link_latex(self.isiscb_id), 
            "description": latex_template_description%(self.description)  if self.description else ""
        }


In [None]:
latex_template_chapter = """
 
\\noindent\\begin{footnotesize}\\textbf{%(counter)s}\\hspace{0.5em} %(author)s``%(title)s''%(edition)s In \\textit{%(book)s}%(book_ref)s%(book_editor)s%(publisher)s (%(year)s)%(pages)s. %(link)s.

%(description)s

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""
latex_template_book_ref = " [ref. %s]"
latex_template_book_editors = ", edited by %s"
latex_template_book_authors = " by %s"
latex_template_book_pages = ", %s"
latex_template_chapter_publisher = " %s"

class Chapter(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.editors = []
        self.title = ''
        self.year = 0
        self.book = None
        self.pages = ''
        
    def get_contributors_to_index(self):
        return self.authors + self.editors
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def get_editor_string(self):
        editors_latex = []
        if self.editors:
            for e in self.editors:
                last, first = get_last_first(e.name)
                if last and first:
                    editors_latex.append(author_template%(last, first))
        eds_suffix = ""
        if editors_latex:
            eds_suffix = " (Eds.) "
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.) "
        
        return prepare_author_editor_string(editors_latex) + eds_suffix
        
        
    def render(self, counter, reviews=None):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        book_ref = ""
        if self.book.index > 0:
            book_ref = latex_template_book_ref%(self.book.index)
            
        book_authors = self.book.get_author_string()
        book_editors = self.book.get_editor_string(append_eds=False) if self.book.get_editor_string() else self.get_editor_string()
        book_editors_or_authors_string = ""
        if book_editors and not book_authors:
            book_editors_or_authors_string = latex_template_book_editors%(book_editors)
        else:
            book_editors_or_authors_string = latex_template_book_authors%(book_authors)
        
        pages_string = ""
        if self.pages:
            pages_string = latex_template_book_pages%(self.pages)
        return latex_template_chapter%{
            "counter": counter, 
            "author": prepare_author_editor_string(authors_latex), 
            "title": handle_html_tags(prepare_title(handle_special_chars(self.title)), self), 
            "edition": get_edition_string(self.edition), 
            "book": handle_html_tags(self.book.title, self.book, set_non_italic=True), 
            "book_ref": book_ref, 
            "book_editor": book_editors_or_authors_string, 
            "year": self.year, 
            "pages": pages_string, 
            "publisher": latex_template_chapter_publisher%(self.book.publisher) if self.book.publisher else "",
            "link": generate_link_latex(self.isiscb_id), 
            "description": latex_template_description%(self.description) if self.description else ""
        }


In [None]:
latex_template_thesis = """
 
\\noindent\\begin{footnotesize}\\textbf{%(counter)s}\\hspace{0.5em} %(author)s``%(title)s''%(edition)s Dissertation at %(school)s  (%(year)s).%(publisher)s%(advisor)s %(link)s.

%(description)s

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""

latex_template_thesis_advisor = " Advisor(s): %s."
latex_template_thesis_publisher = " %s."

class Thesis(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.isbn = ''
        self.school = ''
        self.advisors = []
        
    def get_contributors_to_index(self):
        return self.authors
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def render(self, counter, reviews=None):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))
            
        advisors_latex = ""
        advisors_list = []
        if self.advisors:
            for a in self.advisors:
                name = a.name
                if ',' in name:
                    last = name[:name.index(',')]
                    first = name[name.index(',')+1:]
                else:
                    last = name[name.rfind(" "):].strip()
                    first = name[:name.rfind(" ")].strip()
                advisors_list.append(author_template%(last, first))
            advisors_latex = latex_template_thesis_advisor%(", ".join(advisors_list))

        return latex_template_thesis%{
            "counter":counter, 
            "author":prepare_author_editor_string(authors_latex), 
            "title":handle_html_tags(add_space_before_quotes(prepare_title(handle_special_chars(self.title))), self), 
            "edition": get_edition_string(self.edition), 
            "school":self.school, 
            "year":self.year, 
            "advisor":advisors_latex, 
            "link":generate_link_latex(self.isiscb_id), 
            "publisher": latex_template_thesis_publisher%(self.publisher) if self.publisher else "", 
            "description": latex_template_description%(self.description) if self.description else ""
        }

    

In [None]:
latex_template_media = """

\\noindent\\begin{footnotesize}\\textbf{%s}\\hspace{0.5em} %s``%s''%s%s (%s). %s.

%s

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""
latex_template_media_publisher = " %s."

class Media(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.publisher = None
        
    def get_contributors_to_index(self):
        return self.authors
        
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
    
    def render(self, counter, reviews=None):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))
            
        publisher_string = latex_template_media_publisher%(self.publisher) if self.publisher else ""

        return latex_template_media%(counter, prepare_author_editor_string(authors_latex), handle_html_tags(add_space_before_quotes(prepare_title(handle_special_chars(self.title))), self), get_edition_string(self.edition), publisher_string, self.year, generate_link_latex(self.isiscb_id), latex_template_description%(self.description) if self.description else "")
    

In [None]:
latex_template_essay_review = """

\\noindent\\begin{footnotesize}\\textbf{%(counter)s}\\hspace{0.5em} %(author)s``%(title)s''%(edition)s \\textit{%(journal)s} %(volume)s%(issue)s (%(year)s)%(pages)s.%(publisher)s %(link)s.

%(description)s

\\begin{isisdescription}Essay review of %(book)s\\end{isisdescription}

\\vspace{2ex} \\end{footnotesize}
\\paragraph{}

"""
latex_template_essay_review_publisher = " %s."

volume_template = " %s"
issue_template = ", no. %s"
pages_template = ": %s"
ref_template = " [ref. %s]"
essay_review_entry_template = "%s \\emph{%s}  (%s)%s"

class EssayReview(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.journal = ''
        self.journal_abbr = ''
        self.volume = ''
        self.issue = ''
        self.pages = ''
        self.books = []
        self.reviewed_book_id = ''
        self.active = True
     
    def get_contributors_to_index(self):
        return self.authors
    
    def get_sort_by_value(self):
        return "; ".join([a.get_name_for_sort() for a in self.authors])
   
    def render(self, counter, reviews=None):
        authors_latex = []
        for a in self.authors:
            name = a.name
            if ',' in name:
                last = name[:name.index(',')]
                first = name[name.index(',')+1:]
            else:
                last = name[name.rfind(" "):].strip()
                first = name[:name.rfind(" ")].strip()
            authors_latex.append(author_template%(last, first))

        vol = ''
        if self.volume:
            vol = volume_template%(self.volume)
        issue = ''
        if self.issue:
            issue = issue_template%(self.issue)
        pages = ''
        if self.pages:
            pages = pages_template%(self.pages)
            
        books_info = []
        for book in self.books:
            book_authors_latex = build_persons_latex_list(book.authors)
            book_editors_latex = build_persons_latex_list(book.editors) if type(book) == Book else ""

            book_author_editor_string = ""
            book_editors_string = ""
            if book_editors_latex:
                eds_suffix = " (Eds.) "
                if len(book_editors_latex) == 1:
                    eds_suffix = " (Ed.) "

            if book_authors_latex:
                book_author_editor_string = prepare_author_editor_string(book_authors_latex)
                if book_editors_latex:
                    book_editors_string = ", " + prepare_author_editor_string(book_editors_latex) + eds_suffix
            elif book_editors_latex:
                book_author_editor_string = prepare_author_editor_string(book_editors_latex) + eds_suffix

            ref_string = ""
            if book.index:
                ref_string = ref_template%(book.index)
                
            books_info.append(essay_review_entry_template%(book_author_editor_string, handle_html_tags(add_space_before_quotes(handle_special_chars(book.title)), book, set_non_italic=True), book.year, ref_string))
            
            
        journal_text = self.journal_abbr if self.journal_abbr.strip() else self.journal
        if not journal_text:
            errors.append(DocumentError(self, "No Journal abbreviation or title found.", "ERROR"))
        
        return latex_template_essay_review%{
            "counter": counter, 
            "author": prepare_author_editor_string(authors_latex), 
            "title": handle_html_tags(add_space_before_quotes(prepare_title(handle_special_chars(self.title))), self), 
            "edition": get_edition_string(self.edition), 
            "journal": journal_text, 
            "volume": vol, 
            "issue": issue, 
            "year": self.year, 
            "pages": pages, 
            "publisher": latex_template_essay_review_publisher%(self.publisher) if self.publisher else "",
            "link": generate_link_latex(self.isiscb_id), 
            "description": latex_template_description%(self.description) if self.description else "", 
            "book": "; ".join(books_info)
        }

    

In [None]:
latex_template_book_review="""
\\noindent\\begin{footnotesize}\\textbf{%(index)s}\\hspace{0.5em}%(authors)s \\textit{%(title)s}. %(year)s.%(ref)s %(link)s

%(desc)s

\\begin{isisdescription}%(reviews)s \\end{isisdescription}

 \\end{footnotesize}

\\vspace{0.75ex}

"""

latex_book_review = "%(author)s \\textit{%(journal)s}  %(vol)s (%(year)s)%(pages)s"
book_review_ref_template = " [ref. %s]"
template_book_review_publisher = " %s."

class BookReview(Document):
    def __init__(self):
        Document.__init__(self)
        self.authors = []
        self.journal = ''
        self.journal_abbr = ''
        self.volume = ''
        self.issue = ''
        self.pages = ''
        self.reviewed_book_id = ''
        self.active = True
        
    def get_contributors_to_index(self):
        return self.authors
        
class BookReviews():
    def __init__(self):
        self.book = None
        self.reviews = []
        self.edition = ""
        self.description = ""
        
    def get_sort_by_value(self):
        return self.book.get_sort_by_value()
   
    def get_editor_string(self):
        editors_latex = []
        if self.book.editors:
            for e in self.book.editors:
                last, first = get_last_first(e.name)
                if last and first:
                    editors_latex.append(author_template%(last, first))
        eds_suffix = ""
        if editors_latex:
            eds_suffix = " (Eds.) "
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.) "
                
        return prepare_author_editor_string(editors_latex) + eds_suffix
        
    def render(self, counter, reviews=None):
        authors_latex = build_persons_latex_list(self.book.authors)
        editors_latex = build_persons_latex_list(self.book.editors) if type(self.book) == Book else ""
        
        author_editor_string = ""
        editors_string = ""
        if editors_latex:
            eds_suffix = " (Eds.) "
            if len(editors_latex) == 1:
                eds_suffix = " (Ed.) "
                    
        if authors_latex:
            author_editor_string = prepare_author_editor_string(authors_latex)
            if editors_latex:
                editors_string = ", " + prepare_author_editor_string(editors_latex) + eds_suffix
        elif editors_latex:
            author_editor_string = prepare_author_editor_string(editors_latex) + eds_suffix

        review_latexes = []
        for review in self.reviews:
            review_authors_latex = []
            for a in review.authors:
                name = a.name
                if ',' in name:
                    last = name[:name.index(',')]
                    first = name[name.index(',')+1:]
                else:
                    last = name[name.rfind(" "):].strip()
                    first = name[:name.rfind(" ")].strip()
                review_authors_latex.append(author_template%(last, first))
                
            
            vol = ''
            if review.volume:
                vol = volume_template%(review.volume)
        
            pages = ''
            if review.pages:
                pages = pages_template%(review.pages)
                
            journal_text = review.journal_abbr if review.journal_abbr.strip() else review.journal
            if not journal_text:
                errors.append(DocumentError(review, "No Journal abbreviation or title found.", "ERROR"))
        
            review_latexes.append(latex_book_review%{"author":prepare_author_editor_string(review_authors_latex), "journal":journal_text, "vol":vol, "year":review.year, "pages":pages})
            
        book_authors_latex = build_persons_latex_list(self.book.authors)
        book_editors_latex = build_persons_latex_list(self.book.editors) if type(self.book) == Book else ""
        
        book_author_editor_string = ""
        book_editors_string = ""
        if book_editors_latex:
            eds_suffix = " (Eds.) "
            if len(book_editors_latex) == 1:
                eds_suffix = " (Ed.) "
                    
        if book_authors_latex:
            book_author_editor_string = prepare_author_editor_string(book_authors_latex)
            if book_editors_latex:
                book_editors_string = ", " + prepare_author_editor_string(book_editors_latex) + eds_suffix
        elif book_editors_latex:
            book_author_editor_string = prepare_author_editor_string(book_editors_latex) + eds_suffix
            
        book_ref_info = ""
        if self.book.index:
            book_ref_info = book_review_ref_template%(self.book.index)
  
        return latex_template_book_review%{
            "index":counter, 
            "authors":book_author_editor_string, 
            "title":handle_html_tags(handle_special_chars(self.book.title), self.book, set_non_italic=True), 
            "year":self.book.year, 
            "ref":book_ref_info,
            "link":generate_link_latex(self.book.isiscb_id) if IS_PROOF else "", 
            "desc":latex_template_description%(self.description) if self.description else "",
            "reviews":"; ".join(review_latexes)}


The following cells contain function to create publication objects.

In [None]:
def create_author(author_string):
    author = Author()
    parts = author_string.split('||')
    if len(parts) == 1:
        return None
    for part in parts:
        part = part.strip()
        if part.startswith('AuthorityName '):
            author.name = handle_special_chars(part[len('AuthorityName '):].strip())
            continue
        if part.startswith('ACRDisplayOrder '):
            author.order = part[len('ACRDisplayOrder '):].strip()
        if part.startswith('AuthorityType '):
            author.author_type = part[len('AuthorityType '):]
    return author

In [None]:
def get_publisher(publisher_string):
    if not publisher_string:
        return ""
    publishers = publisher_string.split('//')
    for publisher in publishers:
        authority_name = ''
        authority_type = ''
        parts = publisher.split('||')
        for part in parts:
            part = part.strip()
            if part.startswith('AuthorityName '):
                authority_name = part[len('AuthorityName '):].strip().replace('&', '\\&')
            if part.startswith('ACRType '):
                authority_type = part[len('ACRType '):].strip()
        
        if authority_type == 'Publisher':
            return authority_name
        
    return ''

In [None]:
def parse_journal(row, doc):
    parts = row['Journal Link'].split(" || ")
    if len(parts) <= 1:
        doc.journal = row['Journal Link'].strip()
        return
    for part in parts:
        part = part.strip()
        if part.startswith('AuthorityName '):
            doc.journal = part[len('AuthorityName '):].strip().replace('&', '\\&')
        if part.startswith('Abbreviation '):
            doc.journal_abbr = part[len('Abbreviation '):].strip().replace('&', '\\&')
    

In [None]:
def parse_subjects(row, doc):
    subjects = row['Subjects'].split(" // ")
    for subject in subjects:
        parts = subject.split(" || ")
        subject = Subject()
        if len(parts) <= 1:
            subject.name = row['Subjects'].strip()
            doc.subjects.append(subject)
            return
        for part in parts:
            part = part.strip()
            if part.startswith('AuthorityName '):
                subject.name = part[len('AuthorityName '):].strip().replace('&', '\\&')
            if part.startswith('AuthorityType '):
                subject.authority_type = part[len('AuthorityType '):].strip().replace('&', '\\&')
            if part.startswith('AuthorityID '):
                subject.id = part[len('AuthorityID '):].strip().replace('&', '\\&')
            if part.startswith('ACRNameForDisplayInCitation '):
                subject.display_name = part[len('ACRNameForDisplayInCitation '):].strip().replace('&', '\\&')
            if part.startswith('ACRType '):
                subject.acr_type = part[len('ACRType '):].strip().replace('&', '\\&')
        if subject.acr_type == "Subject":
            doc.subjects.append(subject)

In [None]:
def parse_pages(row):
    pages = row['Pages Free Text']
    if pages:
        pages = pages[:pages.index("(From")].strip() if "(From" in pages else ""
        pages = pages.replace("&", "\&")
    return pages

In [None]:
def parse_volume(row):
    vol = row['Journal Volume']
    if vol:
        vol = vol[:vol.index("(From")].strip() if "(From" in vol else ""
        vol = vol.replace("&", "\&")
    return vol

In [None]:
def parse_issue(row):
    issue = row['Journal Issue']
    if issue:
        issue = issue[:issue.index("(From")].strip() if "(From" in issue else ""
        issue = issue.replace("&", "\&")
    return issue    

In [None]:
def build_book(row):
    book = Book()
    book.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        if a.strip():
            author = create_author(a)
            if author:
                book.authors.append(author)
        
    def get_author_order(author):
        return author.get_order()
    book.authors.sort(key=get_author_order)
        
    editor_strings = row['Editor'].split('//')
    for e in editor_strings:
        editor = create_author(e)
        if editor:
            book.editors.append(editor)
    
    book.title = row['Title'].replace('&', '\\&')
    book.year = row['Year of publication']
    book.publisher = get_publisher(row['Related Authorities'])
    book.isbn = row['ISBN']
    
    related_citations = row['Related Citations'].split('//')
    for rel_cit in related_citations:
        if rel_cit.strip():
            cit_info = rel_cit.split("||")
            cit_id = ''
            type_is_chapter = False
            for info in cit_info:
                info = info.strip()
                if info.startswith("CitationID "):
                    cit_id = info[len("CitationID "):].strip()
                if info.startswith("CCRType  "):
                    ccrType = info[len("CCRType "):].strip()
                    if ccrType.strip() == "Includes Chapter":
                        type_is_chapter = True
                    
            if type_is_chapter and cit_id:
                book.chapter_ids.append(cit_id)                    
    
    return book

In [None]:
def build_chapter(row):
    chapter = Chapter()
    chapter.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        author = create_author(a)
        if author:
            chapter.authors.append(author)
        
    def get_order(author):
        return author.order
    chapter.authors.sort(key=get_order)
    
    chapter.title = row['Title'].replace('&', '\\&')
    chapter.year = row['Year of publication']
    
    editor_strings = row['Editor'].split('//')
    for e in editor_strings:
        editor = create_author(e)
        if editor:
            chapter.editors.append(editor)
    
    related_citations = row['Related Citations'].split("//")
    for cit in related_citations:
        cit_parts = cit.split("||")
        book_title = ""
        book_id = ""
        is_book_info = True
        for part in cit_parts:
            part = part.strip()
            if part.startswith("CCRType "):
                ccrType = part[len("CCRType "):]
                if ccrType.strip() != "Includes Chapter":
                    is_book_info = False
            if part.startswith("CitationTitle "):
                book_title = part[len("CitationTitle "):]
            if part.startswith("CitationID "):
                book_id = part[len("CitationID "):].strip()
                
        if is_book_info:
            chapter.book = Book()
            chapter.book.title = book_title
            chapter.book.isiscb_id = book_id
            break;
        
    chapter.pages = parse_pages(row)
    
    return chapter        

In [None]:
def build_article(row):
    article = Article()
    
    article.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        author = create_author(a)
        if author:
            article.authors.append(author)
    
    article.title = row['Title'].replace('&', '\\&')
    article.year = row['Year of publication']
    article.publisher = get_publisher(row['Related Authorities'])
    parse_journal(row, article)
    
    article.volume = parse_volume(row)
    
    article.issue = parse_issue(row)
    
    article.pages = parse_pages(row)
    return article

In [None]:
def build_thesis(row):
    thesis = Thesis()
    
    thesis.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        author = create_author(a)
        if author:
            thesis.authors.append(author)
            
    advisor_strings = row['Advisor'].split('//')
    for a in advisor_strings:
        advisor = create_author(a)
        if advisor:
            thesis.advisors.append(advisor)
    
    thesis.title = row['Title'].replace('&', '\\&')
    thesis.year = row['Year of publication']
    
    thesis.publisher = get_publisher(row['Related Authorities'])
    
    school_string = row['School']
    for part in school_string.split('||'):
        if part.strip().startswith('AuthorityName'):
            thesis.school = part[len("AuthorityName "):].strip()
            
    return thesis

In [None]:
def build_media(row):
    media = Media()
    media.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        author = create_author(a)
        if author:
            media.authors.append(author)
   
    media.title = row['Title'].replace('&', '\\&')
    media.year = row['Year of publication']
    
    media.publisher = get_publisher(row['Related Authorities'])
    
    return media

In [None]:
def build_essay_review(row):
    essay_review = EssayReview()
    
    essay_review.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        author = create_author(a)
        if author:
            essay_review.authors.append(author)
    
    essay_review.title = row['Title'].replace('&', '\\&')
    essay_review.year = row['Year of publication']
    essay_review.publisher = get_publisher(row['Place Publisher'])
    parse_journal(row, essay_review)
    
    essay_review.volume = parse_volume(row)
    essay_review.issue  = parse_issue(row)
    essay_review.pages = parse_pages(row)
         
    related_citations = row['Related Citations'].split("//")
    for cit in related_citations:
        cit_parts = cit.split("||")
        book_title = ""
        book_id = ""
        is_book_info = True
        for part in cit_parts:
            part = part.strip()
            if part.startswith("CCRType "):
                ccrType = part[len("CCRType "):]
                if ccrType.strip() != "Is Reviewed By":
                    is_book_info = False
            if part.startswith("CitationTitle "):
                book_title = part[len("CitationTitle "):]
            if part.startswith("CitationID "):
                book_id = part[len("CitationID "):].strip()
                
        if is_book_info:
            book = Book()
            book.title = book_title
            book.isiscb_id = book_id
            essay_review.books.append(book)
            
    return essay_review

In [None]:
def build_book_review(row):
    book_review = BookReview()
    
    book_review.isiscb_id = row['Record ID']
    
    author_strings = row['Author'].split('//')
    for a in author_strings:
        if a.strip():
            author = create_author(a)
            if author:
                book_review.authors.append(author)
        
    def get_order(author):
        return author.order
    book_review.authors.sort(key=get_order)
    
    book_review.year = row['Year of publication']
    parse_journal(row, book_review)
    
    book_review.volume = parse_volume(row)
    book_review.issue = parse_issue(row)
    book_review.pages = parse_pages(row)
        
    status = row['Record Nature']
    if not status.startswith('Active'):
        book_review.active = False
        
    related_citations = row['Related Citations'].split("//")
    for cit in related_citations:
        cit_parts = cit.split("||")
        book_title = ""
        book_id = ""
        is_book_info = True
        for part in cit_parts:
            part = part.strip()
            if part.startswith("CCRType "):
                ccrType = part[len("CCRType "):]
                if ccrType.strip() != "Is Reviewed By":
                    is_book_info = False
            if part.startswith("CitationTitle "):
                book_title = part[len("CitationTitle "):]
            if part.startswith("CitationID "):
                book_id = part[len("CitationID "):].strip()
                
        if is_book_info:
            book_review.reviewed_book_id = book_id
            break;
            
    return book_review

The following cells deal with the classifications.

In [None]:
class Classification():
    def __init__(self):
        self.code = ''
        self.code_print = ''
        self.code_for_sorting = -1
        self.name = ''
        self.code_main = -1
        self.code_sub = -1
        self.acr_type = ''
        self.authority_type = ''

In [None]:
def create_classification_object(class_code, doc):
    classi = Classification()
    
    for part in class_code.split('||'):
        if part.strip().startswith('ClassificationCode'):
            code_parts = part[len("ClassificationCode "):].strip().split('-')
            classi.code = part[len("ClassificationCode "):].strip()
            while "" in code_parts: code_parts.remove("")
            try:
                if len(code_parts) == 1:
                    classi.code_for_sorting = int(code_parts[0].strip())/1000
                    classi.code_print = code_parts[0].strip()
                    classi.code_main = int(code_parts[0].strip())
                    classi.code_sub = 0
                if len(code_parts) == 2:
                    classi.code_for_sorting = float(code_parts[1].strip()) + float(code_parts[0].strip())/1000
                    classi.code_print = code_parts[1].strip() + "-" + code_parts[0].strip()
                    classi.code_main = int(code_parts[1].strip())
                    classi.code_sub = int(code_parts[0].strip())
            except Exception:
                print("Category code not valid " + classi.code)
                doc.category_number = ""
                return None

        if part.strip().startswith('AuthorityName'):
            classi.name = part[len("AuthorityName "):].strip()
            
        if part.strip().startswith('ACRType'):
            classi.acr_type = part[len("ACRType "):].strip()
            
        if part.strip().startswith('AuthorityType'):
            classi.authority_type = part[len("AuthorityType "):].strip()
    
    if (classi.acr_type != 'Category' or classi.authority_type != 'Classification Term'):
        print("Skipping classification due to wrong type %s, %s."%(classi.acr_type, classi.authority_type))
        return None
    
    return classi

## Main

Below is the main code that reads and writes files and generates the bilbiography.

Decide which type of doc to build:

In [None]:
def create_doc(row):
    doc = None
    if row['Record Type'] == "Book":
        doc = build_book(row)
    elif row['Record Type'] == "Article":
        doc = build_article(row)
    elif row['Record Type'] == "Chapter":
        doc = build_chapter(row)
    elif row['Record Type'] == "Thesis":
        doc = build_thesis(row)
    elif row['Record Type'] == "Multimedia Object":
        doc = build_media(row)
    elif row['Record Type'] == "Essay Review":
        doc = build_essay_review(row)
    elif row['Record Type'] == "Review":
        doc = build_book_review(row)
    
    if doc:
        parse_subjects(row, doc)
    return doc

Set basic info:

In [None]:
def set_basic_info(doc, row):
    doc.database = row['Dataset']
    doc.category_number = row['CategoryNumbers'].strip()
    doc.print_date = row['Published Print'].strip()
    doc.edition = row['Edition Details']
    doc.edition = doc.edition.replace("<em>", "\emph{").replace("</em>", "}") if doc.edition else ""
    doc.description = row['Description']
    doc.description = doc.description.replace("<em>", "\emph{").replace("</em>", "}") if doc.description else ""
    doc.description = doc.description.replace("&", "\&") if doc.description else ""

In [None]:
def handle_classifications(row, doc, classifications_by_nr, docs_by_classification):
    class_codes = row['CategoryNumbers'].split('//')
    doc.classifications = []
    all_classies = []
    
    if IGNORE_CATEGORIES:
        classi = Classification()
        classi.code_for_sorting = 0
        classi.code_print = "0"
        classi.code_main = 0
        classi.code_sub = 0
        classi.acr_type = 'Category' 
        classi.authority_type = 'Classification Term'
        doc.category_number = "0"
        all_classies.append(classi)
    else:
        # create all classifications
        for class_code in class_codes:
            classi = create_classification_object(class_code, doc)
            if classi:
                all_classies.append(classi)
    # if there are more than one, we have an error
    if len(all_classies) > 1:
        errors.append(DocumentError(doc, "There are more than two classifications.", "ERROR"))
    # otherwise append classification to doc
    elif len(all_classies) == 1:
        classi = all_classies[0]
        if classi.code_print.strip():
            doc.classifications.append(classi)
            if classi.code_for_sorting not in classifications_by_nr:
                classifications_by_nr[classi.code_for_sorting] = classi
            if classi.code_for_sorting not in docs_by_classification:
                docs_by_classification[classi.code_for_sorting] = []
            docs_by_classification[classi.code_for_sorting].append(doc)

Function to check if document should be included based on database:

In [None]:
def include_based_on_database(doc):
    if not IGNORE_DATASET:
        return doc.database.strip() == DATABASE_TO_PRINT
    else:
        return True

Read csv file:

In [None]:
def read_csv(export_file, docs, docs_by_id, classifications_by_nr, docs_by_classification, reviews_by_book_id):
    with open(export_file, encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)

        for row in reader:
            doc = create_doc(row)

            if doc and not doc.isiscb_id in docs_by_id:
                set_basic_info(doc, row)

                if type(doc) not in [BookReview]: 
                    docs.append(doc)
                    docs_by_id[doc.isiscb_id] = doc

                    if include_based_on_database(doc): 
                        # classification
                        handle_classifications(row, doc, classifications_by_nr, docs_by_classification) 
                else:
                    reviews_list = reviews_by_book_id.get(doc.reviewed_book_id, None)
                    if not reviews_list:
                        reviews_by_book_id[doc.reviewed_book_id] = []

                    if doc.active and include_based_on_print_date(doc.print_date) and include_based_on_database(doc):
                        reviews_by_book_id[doc.reviewed_book_id].append(doc)
                    elif not include_based_on_database(doc):
                        errors.append(DocumentError(doc, "Document has wrong database (%s). Skipping."%(doc.database), "WARNING"))

In [None]:
def get_sort_by_value(doc):
    return doc.get_sort_by_value()

Create index for publications:

In [None]:
def create_index(docs_by_classification):
    counter = 1
    for classi_code in sorted(docs_by_classification):
        sorted_docs = docs_by_classification[classi_code]
        sorted_docs.sort(key=get_sort_by_value)
        for doc in sorted_docs:
            if doc.category_number and include_based_on_print_date(doc.print_date) and include_based_on_database(doc):
                doc.index = counter
                counter += 1
            elif not include_based_on_database(doc):
                errors.append(DocumentError(doc, "Document has wrong database (%s). Skipping."%(doc.database), "WARNING"))
        docs_by_classification[classi_code] = sorted_docs


Add books to reviews:

In [None]:
def add_books_to_reviews(reviews_by_book_id, docs_by_id, all_book_reviews, book_reviews_obj_by_book_id):
    for book_id in reviews_by_book_id:
        if book_id in docs_by_id:
            book = docs_by_id[book_id]

            if book:
                book_reviews = BookReviews()
                book_reviews.book = book
                book_reviews.reviews = reviews_by_book_id[book_id]
                all_book_reviews.append(book_reviews)
                book_reviews_obj_by_book_id[book.isiscb_id] = book_reviews

Create index for reviews:

In [None]:
def create_index_for_reviews(all_book_reviews):
    counter = 1
    for doc in all_book_reviews:
        doc.index = "R" + str(counter)
        counter += 1

Add books to index:

In [None]:
def add_to_index(doc, author_sorted_index, author_index, review_idx = -1):
    to_index = doc.get_contributors_to_index()
    for author in to_index:
        sort_name = author.get_name_for_sort()
        display_name = ", ".join([author.get_last_name(), author.get_first_name()])
        if sort_name not in author_sorted_index:
            author_sorted_index[sort_name] = display_name
        if display_name not in author_index:
            author_index[display_name] = []
        # for reviews we need to add their review id (R..) not by the book id
        author_index[display_name].append(str(doc.index) if review_idx == -1 else str(review_idx))    

In [None]:
def add_to_subject_index(doc, subject_sorted_index, subject_index):
    for subject in doc.subjects:
        # we want to exclude predefined subjects
        # print("subject: " + subject.id)
        if subject.id in SUBJECTS_TO_EXCLUDE:
            continue
        subject_sorted_index[unidecode(subject.name.lower())] = subject
        if subject.name not in subject_index:
            subject_index[subject.name] = []
        subject_index[subject.name].append(str(doc.index))

Print publications:

In [None]:
heading_main_section_latex_template = """
\\addtocontents{toc}{\\vspace{1em}}\\setcounter{section}{%s}
\\section*{%s.\\hspace{0.5em}%s}
\\markboth{%s}{%s}
\\addcontentsline{toc}{section}{\\protect\\numberline{%s}\\textbf{%s}}
"""
    
heading_latex_template = """
\\setcounter{subsection}{%s}
\\vspace{-5pt}\\nopagebreak\\subsection*{%s.\\hspace{0.5em}%s}
\\addcontentsline{toc}{subsection}{\\protect\\numberline{%s.}%s}
"""

def print_publications(result_file_path, docs_by_classification, classifications_by_nr, author_sorted_index, author_index, subject_sorted_index, subject_index):
    with open(result_file_path, 'w', encoding='utf-8') as result_file:
        # which one is the last main section seen
        last_main_sec = -1
        for classi_code in sorted(docs_by_classification):
            classification = classifications_by_nr[classi_code]
            docs_to_print = list(filter(lambda d : d.category_number and include_based_on_print_date(d.print_date) and d.index > 0, docs_by_classification[classi_code]))
            #print(str(len(docs_to_print)) + " in category " + str(classi_code))
            main_title = classification.name
            sub_title = ""
            if " -- " in main_title:
                main_title = classification.name[classification.name.index(" -- ")+4:]
                sub_title = classification.name[:classification.name.index(" -- ")]

            if last_main_sec != classification.code_main and docs_to_print and not IGNORE_CATEGORIES:
                main_sec_heading = heading_main_section_latex_template%(str(classification.code_main), str(classification.code_main), main_title, main_title, main_title, str(classification.code_main), main_title)
                print("Writing main section: " + main_title)
                result_file.write("\n" + main_sec_heading + "\n")
                last_main_sec = classification.code_main

            if sub_title and docs_to_print and not IGNORE_CATEGORIES:
                heading = heading_latex_template%(str(classification.code_sub), classification.code_print, sub_title, str(classification.code_sub), sub_title)
                print("Writing " + sub_title)
                result_file.write(heading + "\n")

            for doc in docs_by_classification[classi_code]:
                if (type(doc) in [Chapter]) and doc.book.isiscb_id in docs_by_id:
                    book = docs_by_id[doc.book.isiscb_id]
                    if book:
                        doc.book = book
                if type(doc) == EssayReview:
                    final_book_list = []
                    for book in doc.books:
                        if book.isiscb_id in docs_by_id:
                            final_book_list.append(docs_by_id[book.isiscb_id])
                        else:
                            final_book_list.append(doc)
                    doc.books = final_book_list

                if type(doc) == Book and doc.chapter_ids:
                    for chap_id in doc.chapter_ids:
                        if chap_id in docs_by_id:
                            doc.chapters.append(docs_by_id[chap_id])
                # print all classified records (e.g., have Category Number) 
                # that have not been previously printed (e.g., have Published Print data).
                if doc.category_number and include_based_on_print_date(doc.print_date) and doc.index > 0:
                    print("Printing: " + str(doc.index))
                    reviews = []
                    if doc.isiscb_id in book_reviews_obj_by_book_id:
                        reviews = book_reviews_obj_by_book_id[doc.isiscb_id]
                    line = doc.render(doc.index, reviews=reviews).strip() + "\n"
                    result_file.write(line)

                    # add to author index
                    add_to_index(doc, author_sorted_index, author_index)
                    if type(doc) != EssayReview:
                        add_to_subject_index(doc, subject_sorted_index, subject_index)
                    if type(doc) == EssayReview:
                        for book in doc.books:
                            add_to_index(book, author_sorted_index, author_index, doc.index)

Print reviews:

In [None]:
def print_reviews(reviews_file_path, all_book_reviews, author_sorted_index, author_index):
    with open(reviews_file_path, 'w', encoding='utf-8') as reviews_file:
        for reviews in all_book_reviews:
            print("Writing Reviews for" + str(reviews.book.isiscb_id))
            reviews_file.write(reviews.render(reviews.index).strip() + "\n")

            add_to_index(reviews.book, author_sorted_index, author_index, reviews.index)
            # add to author index
            for review in reviews.reviews:
                add_to_index(review, author_sorted_index, author_index, reviews.index)

Write errors:

In [None]:
def print_errors(errors_file_path):
    with open(errors_file_path, 'w', encoding='utf-8') as error_file:
        error_file.write("Document Id, Type, Error Message\n")
        for error in errors:
            error_file.write(error.doc.isiscb_id + ", " + error.error_type + ", " + error.error + "\n")


Print author index:

In [None]:
def print_author_index(author_index_file_path, author_sorted_index, author_index):
    index_template = """
    \\begin{cbindex}%s %s\\end{cbindex}
    """
    with open(author_index_file_path, 'w', encoding="utf-8") as index_file:
        for author in sorted(author_sorted_index):
            print(author)
            line = index_template%(author_sorted_index[author], ", ".join(author_index[author_sorted_index[author]]))
            index_file.write(line + "\n")


Print subject index

In [None]:
def print_subject_index(subject_index_file_path, subjects_sorted_index, subject_index):
    index_template = """
    \\begin{cbindex}%s %s\\end{cbindex}
    """
    with open(subject_index_file_path, 'w', encoding="utf-8") as index_file:
        for subject in sorted(subjects_sorted_index):
            print(subject)
            line = index_template%(subjects_sorted_index[subject].name, ", ".join(subject_index[subjects_sorted_index[subject].name]))
            index_file.write(line + "\n")

In [None]:
docs = []
docs_by_id = {}
reviews_by_book_id = {}
classifications_by_nr = {}
docs_by_classification = {}

# ------------ Reading publications ------------------
read_csv(export_file, docs, docs_by_id, classifications_by_nr, docs_by_classification, reviews_by_book_id)
create_index(docs_by_classification)                
    

# ------------- Dealing with reviews ----------------------
all_book_reviews = []
book_reviews_obj_by_book_id = {}

add_books_to_reviews(reviews_by_book_id, docs_by_id, all_book_reviews, book_reviews_obj_by_book_id)

all_book_reviews.sort(key=get_sort_by_value)
create_index_for_reviews(all_book_reviews)

author_sorted_index = {}
author_index = {}

# the sorted dictionary will be used to sort all subjects
subject_sorted_index = {}
# this index will be used to hold the publication numbers for a subjecg
subject_index = {}

# ---------------- Print publications -------------
print_publications(result_file_path, docs_by_classification, classifications_by_nr, author_sorted_index, author_index, subject_sorted_index, subject_index)

# ---------------- Print reviews ---------------
print_reviews(reviews_file_path, all_book_reviews, author_sorted_index, author_index)
        
# ---------------- Print errors -----------------
print_errors(errors_file_path)

# ---------------- Print author index ---------------
print_author_index(author_index_file_path, author_sorted_index, author_index)

# ---------------- Print subject index --------------
print_subject_index(subject_index_file_path, subject_sorted_index, subject_index)