In [1]:
import bibtexparser

## Import from .bib file using bibtexparser

In [41]:
parser = bibtexparser.bparser.BibTexParser(common_strings=True, ignore_nonstandard_types=False)
with open("cv-academic/cv.bib") as bibtex_file:
    publications = bibtexparser.load(bibtex_file, parser=parser)

publications.entries

[{'url': 'https://www.sciencedirect.com/science/article/pii/S2211675320300038',
  'publisher': 'Elsevier',
  'year': '2020',
  'pages': '100409',
  'journal': 'Spatial Statistics',
  'author': 'Wang, Yu and Le, Nhu D and Zidek, James V',
  'keywords': 'article',
  'title': 'Approximately optimal spatial design: How good is it?',
  'ENTRYTYPE': 'article',
  'ID': 'wang2020approximately'},
 {'url': 'https://onlinelibrary.wiley.com/doi/full/10.1002/env.2483',
  'publisher': 'Wiley Online Library',
  'year': '2018',
  'pages': 'e2483',
  'number': '1',
  'volume': '29',
  'journal': 'Environmetrics',
  'author': 'Casquilho-Resende, CM and Le, ND and Zidek, JV and Wang, Y',
  'keywords': 'article',
  'title': 'Design of monitoring networks using k-determinantal point processes',
  'ENTRYTYPE': 'article',
  'ID': 'casquilho2018design'},
 {'url': 'http://proceedings.mlr.press/v108/wang20d.html',
  'publisher': 'PMLR',
  'month': 'August',
  'series': 'Proceedings of Machine Learning Research'

## Excape special characters

In [42]:
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;"
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

## Extract authors list

In [43]:
def parse_author(a, keyfirst="Yu", keylast="Wang"):
    """Parses author name in the format of `Last, First Middle` where
    the middle name is sometimes there and sometimes not (and could just be an initial)
    
    Returns: author name as `F. M. Last`
    """
    
    a = a.split(', ')
    last = a[0].strip()
    fm = a[1].split(' ')
    first = fm[0][0] + '.'
    
    if len(fm) > 1:
        middle = fm[1][0] + '.'
    else:
        middle = ''
    
    if last == keylast and fm[0] == keyfirst:
        if not middle == '':
            return '<b><i>' + first + ' ' + middle + ' ' + last + '</i></b>'
        else:
            return '<b><i>' + first + ' ' + last + '</i></b>'
    else:
        if not middle == '':
            return first + ' ' + middle + ' ' + last
        else:
            return first + ' ' + last

## Creating markdown files

In [44]:
p = publications.entries[8]
p = bibtexparser.customization.add_plaintext_fields(p)
p

{'year': '2020',
 'title': 'The Sylvester Graphical Lasso (SyGlasso)',
 'booktitle': 'Conference proceedings talk at the AISTATS 2020, Palermo, Italy',
 'month': 'August',
 'keywords': 'conference, contributed',
 'author': 'Wang, Yu',
 'ENTRYTYPE': 'inproceedings',
 'ID': 'wang2020aistats20',
 'plain_year': '2020',
 'plain_title': 'The Sylvester Graphical Lasso (SyGlasso)',
 'plain_booktitle': 'Conference proceedings talk at the AISTATS 2020, Palermo, Italy',
 'plain_month': 'August',
 'plain_keywords': 'conference, contributed',
 'plain_author': 'Wang, Yu',
 'plain_ENTRYTYPE': 'inproceedings',
 'plain_ID': 'wang2020aistats20'}

In [45]:
import os
from datetime import datetime

for item in publications.entries:
    
    item = bibtexparser.customization.add_plaintext_fields(item)
    year = item['plain_year'] if 'plain_year' in item else item['plain_date'].split('-')[0]
    key = item['plain_ID']

    md_filename = key + ".md"
    html_filename = key

    ## YAML variables

    md = "---\ntitle: \""   + item['plain_title'] + '"\n'

    md += """collection: publications"""

    md += """\npermalink: /publication/""" + html_filename

    month = datetime.strptime(item['plain_month'],"%B").month if 'plain_month' in item else 6
    day = item['plain_day'] if 'plain_day' in item else 15
    date = datetime(int(year), int(month), int(day)).isoformat() + "00:00:00 + 0500"

    #     if not 'plain_date' in item:
    #         raise Exception(item)
    #     date = item['plain_date']
    #     if len(date) == 4:
    #         date += "-06-15 00:00:00 +0500"
    #     elif len(date) == 7:
    #         date += "-15 00:00:00 +0500"
    #     elif len(date) == 10:
    #         date += " 00:00:00 +0500"
    #     else:
    #         print(date)
    #         break

    md += "\ndate: " + date

    if 'plain_journal' in item:
        venue = item['plain_journal']
    elif 'plain_eventtitle' in item:
        venue = item['plain_eventtitle']
    elif 'plain_booktitle' in item:
        venue = item['plain_booktitle']
    elif 'plain_journaltitle' in item:
        venue = item['plain_journaltitle']
    elif 'plain_school' in item:
        venue = item['plain_institution']
    else:
        venue = False

    if 'plain_note' in item:
        note = item['plain_note']
        venue += " (<b><i>" + note + "</i></b>)"

    if venue:
        md += "\nvenue: '" + html_escape(venue) + "'"

    if 'plain_url' in item:
        md += "\npaperurl: '" + item['plain_url'] + "'"

    if 'plain_doi' in item:
        md += "\ndoi: '" + item['plain_doi'] + "'"

    #     pubtypes = {"inproceedings": "conference",
    #                 "article": "journal",
    #                 "thesis": "academic"}

    md += "\npubtype: '" + item['plain_keywords'] + "'"

    authors = ', '.join([parse_author(a) for a in item['plain_author'].split(' and ')])
    md += "\nauthors: '" + authors + "'"

    md += "\nexcerpt_separator: \"\""

    md += "\n---"

    ## Markdown description for individual page

    if 'plain_abstract' in item:
        md += "\n" + html_escape(item['plain_abstract']) + "\n"

    # if 'plain_url' in item:
    #     md += "\n[Download paper here](" + item['plain_url'] + ")"

    # if 'plain_doi' in item:
    #     md += "\n[DOI](" + item['plain_doi'] + ")"

    md_filename = os.path.basename(md_filename)

    with open(md_filename, 'w') as f:
        f.write(md)