In [1]:
import ast
import copy
import numpy as np
import os

In [2]:
import cc.atlas as atlas
import cc.cartography as cartography
import cc.publication as publication
import cc.utils as utils
import cc.tex as tex

In [3]:
import trove

# Literature Review

This notebook contains a record of how I performed the literature review for this work.

# Parameters

In [4]:
config_fp = '/Users/zhafen/paper_repos/cgm_modeling_challenge_paper/literature_review/literature_review.trove'
cp = trove.config_parser.ConfigParser( config_fp )
pm = trove.link_params_to_config(
    config_fp,
)

In [5]:
atlas_dir = pm['root_data_dir']

In [6]:
topics = {}
for variation in cp.variations:
    topics[variation] = ast.literal_eval( cp.get( variation, 'publications' ) )

# Setup

# Extensive Survey
Get all papers that might be of relevance.
We'll later identify the subset that are of more interest.

## Base Library
My existing library of papers.

In [7]:
a_zotero = atlas.Atlas( atlas_dir, data_fp=pm['zotero_atlas_fp'], load_bibtex=False, )

Loading saved atlas data.


0it [00:00, ?it/s]
100%|███████████████████████████████████| 2537/2537 [00:00<00:00, 17845.95it/s]


In [8]:
a_zotero.import_bibtex( pm['zotero_bibtex_fp'] )

Entry type online not standard. Not considered.


Loading bibliography entries.


Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type thesis not standard. Not considered.
Entry type online not standard. Not considered.
Entry type software not standard. Not considered.
Entry type software not standard. Not considered.
Entry type thesis not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.


Storing bibliography entries.


100%|██████████████████████████████████| 2526/2526 [00:00<00:00, 199826.71it/s]


In [9]:
a_zotero.process_abstracts( identifier='from_citation' )

    Making 1 ADS calls...


100%|████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/it]


    Making 27 ADS calls for publications without IDs...


100%|██████████████████████████████████████████| 27/27 [00:16<00:00,  1.65it/s]


    Doing NLP...


100%|████████████████████████████████████| 2542/2542 [00:00<00:00, 7786.29it/s]


In [10]:
a_zotero.save_data( fp=pm['zotero_atlas_fp'] )

Preparing to save data.


100%|██████████████████████████████████| 2542/2542 [00:00<00:00, 231403.60it/s]


Saving to /Users/zhafen/Data/cgm_modeling_challenge/literature_review/atlas_zotero.json


In [11]:
# Vectorize
vp_dict = a_zotero.vectorize( projection_fp=pm['zotero_projection_fp'], overwrite=True )
c_zotero = cartography.Cartographer( **vp_dict )

Vectorizing text...
    Retrieving publication data...


100%|██████████████████████████████████| 2542/2542 [00:00<00:00, 278146.74it/s]

    Calculating vectorization...



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 49.31it/s]


## Search Centered on Abstract
Perform a search centered on the abstract of the paper we are writing.

In [None]:
pub_doc = tex.Tex( filepath=pm['tex_fp'] )

In [None]:
abstract = pub_doc.string.split( '\\begin{abstract}' )[1].split( '\\end{abstract}' )[0]

In [None]:
print( abstract )

### Add Abstract to Atlas

In [None]:
p = publication.UnofficialPublication( pm['citation_key'] )

In [None]:
p.process_abstract( abstract_str = abstract )

In [None]:
a_zotero.data[p.citation_key] = copy.deepcopy( p )

In [None]:
# Vectorize
vp_dict = a_zotero.vectorize( projection_fp=pm['zotero_projection_fp'], overwrite=True )
c_zotero = cartography.Cartographer( **vp_dict )

### Perform the Search

#### First Search

In [None]:
a = c_zotero.expand( a_zotero, center=p.citation_key )

In [None]:
a.process_abstracts()
a.save_data()

#### Second Search

In [None]:
# Vectorize
vp_dict = a.vectorize( overwrite=True )
c = cartography.Cartographer( **vp_dict )

In [None]:
a = c.expand( a, center=p.citation_key )

In [None]:
a.process_abstracts()
a.save_data()

#### Third Search

In [None]:
# Vectorize
vp_dict = a.vectorize( overwrite=True )
c = cartography.Cartographer( **vp_dict )

In [None]:
a = c.expand( a, center=p.citation_key )

In [None]:
a.process_abstracts()
a.save_data()

#### Finish Up

In [None]:
# Vectorize
vp_dict = a.vectorize( overwrite=True )
c = cartography.Cartographer( **vp_dict )

In [None]:
print( 'We now have {} related publications to search through!'.format( c.publications.size ) )