In [1]:
import ast
import copy
import numpy as np
import os

In [2]:
import cc.atlas as atlas
import cc.cartography as cartography
import cc.publication as publication
import cc.utils as utils
import cc.tex as tex

In [3]:
import trove

# Parameters

In [8]:
config_fp = '/Users/zhafen/paper_repos/Hot-Accretion-in-FIRE/literature_review/literature_review.trove'
cp = trove.config_parser.ConfigParser( config_fp )
pm = trove.link_params_to_config(
    config_fp,
    variation = 'angular_momentum_cancellation',
)

In [9]:
atlas_dir = pm['root_data_dir']

In [10]:
topics = {}
search_strs = {}
for variation in cp.variations:
    topics[variation] = ast.literal_eval( cp.get( variation, 'publications' ) )
    try:
        search_strs[variation] = ast.literal_eval( cp.get( variation, 'search_str' ) )
    except:
        continue

## Load

### Zotero Atlas

In [11]:
a_zotero = atlas.Atlas( atlas_dir, data_fp=pm['zotero_atlas_fp'], load_bibtex=False, )

Loading saved atlas data.


0it [00:00, ?it/s]
100%|██████████| 1904/1904 [00:00<00:00, 17049.00it/s]


In [12]:
a_zotero.import_bibtex( pm['zotero_bibtex_fp'] )

Loading bibliography entries.


Entry type thesis not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type online not standard. Not considered.
Entry type report not standard. Not considered.
Entry type online not standard. Not considered.
Entry type thesis not standard. Not considered.


Storing bibliography entries.


100%|██████████| 1889/1889 [00:00<00:00, 94667.90it/s]


In [13]:
a_zotero.process_abstracts( identifier='from_citation' )

    Making 1 ADS calls...


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]


    Making 25 ADS calls for publications without IDs...


100%|██████████| 25/25 [00:14<00:00,  1.68it/s]


    Doing NLP...


100%|██████████| 1904/1904 [00:00<00:00, 742879.52it/s]


In [14]:
a_zotero.save_data( fp=pm['zotero_atlas_fp'] )

Preparing to save data.


100%|██████████| 1904/1904 [00:00<00:00, 5559.58it/s]


Saving to /Users/zhafen/Data/hot_halo_accretion/literature_review/atlas_zotero.json


In [15]:
# Vectorize
vp_dict = a_zotero.vectorize( projection_fp=pm['zotero_projection_fp'], overwrite=True )
c_zotero = cartography.Cartographer( **vp_dict )

Vectorizing text...
    Retrieving publication data...


100%|██████████| 1904/1904 [00:00<00:00, 257852.66it/s]

    Calculating vectorization...





### Large Atlas

In [16]:
# Larger atlas
a = atlas.Atlas( atlas_dir, load_bibtex=False, )

Loading saved atlas data.


0it [00:00, ?it/s]
100%|██████████| 19540/19540 [00:00<00:00, 25575.25it/s]


In [17]:
a.update( a_zotero )

100%|██████████| 1904/1904 [00:00<00:00, 162197.47it/s]
100%|██████████| 17719/17719 [00:00<00:00, 126572.38it/s]


In [18]:
a.process_abstracts( identifier='from_citation' )

    Making 1 ADS calls...


100%|██████████| 1/1 [00:00<00:00,  1.78it/s]


    Making 23 ADS calls for publications without IDs...


100%|██████████| 23/23 [00:11<00:00,  1.92it/s]


    Doing NLP...


100%|██████████| 18368/18368 [00:00<00:00, 753589.64it/s]


In [19]:
# Vectorization
vp_dict = a.vectorize( overwrite=True )
c = cartography.Cartographer( **vp_dict )

Vectorizing text...
    Retrieving publication data...


100%|██████████| 18368/18368 [00:00<00:00, 100167.43it/s]


    Calculating vectorization...


# Search

In [20]:
print( 'Expanding for topics...' )
for i in range( 3 ):
    print( '\n================================================================\nIteration {}'.format( i ) )
    broken = []
    for topic, pubs_i in topics.items():
        print( '\n================================================================\n{}'.format( topic ) )
        for p_i in pubs_i:
            try:
                a = c.expand( a, center=p_i, n_sources_max=pm['kernel_size'] )
                a.process_abstracts()
            except ( ValueError, AssertionError ) as e:
                if isinstance( e, ValueError ):
                    broken.append( p_i )

        if topic not in search_strs:
            continue

        # Search string
        print( 'Searching for {}'.format( search_strs[topic] ) )
        p = publication.UnofficialPublication( topic )
        p.process_abstract( abstract_str=search_strs[topic] )
        a.data[topic] = p

        vp_dict = a.vectorize( overwrite=True, projection_fp='pass' )
        c = cartography.Cartographer( **vp_dict )
        try:
            a = c.expand( a, center=p_i, n_sources_max=pm['kernel_size'] )
            a.process_abstracts()
        except ( ValueError, AssertionError ) as e:
            print( 'Search complete for search_str' )

Expanding for topics...

Iteration 0

coronal_mixing_accretion
Expansion will include 164 new publications.
Loading bibliography entries.
Storing bibliography entries.


100%|██████████| 18767/18767 [00:00<00:00, 29928.08it/s]


    Making 14 ADS calls...


100%|██████████| 14/14 [01:30<00:00,  6.44s/it]


    Making 0 ADS calls for publications without IDs...


0it [00:00, ?it/s]


    Doing NLP...


100%|██████████| 19542/19542 [00:58<00:00, 335.89it/s] 


Expansion will include 1 new publications.
Loading bibliography entries.
Storing bibliography entries.


100%|██████████| 18768/18768 [00:00<00:00, 20447.97it/s]


    Making 1 ADS calls...


  0%|          | 0/1 [00:00<?, ?it/s]


ConnectionError: HTTPSConnectionPool(host='api.adsabs.harvard.edu', port=443): Max retries exceeded with url: /v1/search/query/?q=bibcode%3A%222018MNRAS.477.2716K%22+OR+bibcode%3A%22Lanson2008%22+OR+bibcode%3A%22Feige2011%22+OR+bibcode%3A%22Zanotti2010%22+OR+bibcode%3A%22Riedl2006%22+OR+bibcode%3A%22Siekmann1991%22+OR+bibcode%3A%22Hartigan1985%22+OR+bibcode%3A%22Kepler2016%22+OR+bibcode%3A%22Chan2017%22+OR+bibcode%3A%22Scheufele1999%22+OR+bibcode%3A%22Anderson2016%22+OR+bibcode%3A%22VanDeVoort2017a%22+OR+bibcode%3A%22Parker1965%22+OR+bibcode%3A%22Klein2003%22+OR+bibcode%3A%22Kaplan1958%22+OR+bibcode%3A%22Turnbull1976%22+OR+bibcode%3A%22Pillepich2017%22+OR+bibcode%3A%22Smagorinsky1963%22+OR+bibcode%3A%22Whittaker2000%22+OR+bibcode%3A%22Runeson2006%22+OR+bibcode%3A%22Coelho2017%22+OR+bibcode%3A%22Cen2001%22+OR+bibcode%3A%22Varotsis2018%22+OR+bibcode%3A%22Draine2011%22+OR+bibcode%3A%22Pirker2015%22+OR+bibcode%3A%22Deterding2011%22+OR+bibcode%3A%22Reiners2015%22+OR+bibcode%3A%22Yarkoni2019%22+OR+bibcode%3A%22Hanes1940%22+OR+bibcode%3A%22Schilling2005%22+OR+bibcode%3A%22Price1976%22+OR+bibcode%3A%22Lam2015%22+OR+bibcode%3A%22Kluyver2016%22+OR+bibcode%3A%22DeSollaPrice1989%22+OR+bibcode%3A%22Vinkers2015%22+OR+bibcode%3A%22Fortunato2018%22+OR+bibcode%3A%22Sinatra2016%22+OR+bibcode%3A%22Strevens2003%22+OR+bibcode%3A%22Kitcher1990%22+OR+bibcode%3A%22Young2020%22+OR+bibcode%3A%22Steegen2016%22+OR+bibcode%3A%22Shockley1957%22+OR+bibcode%3A%22Henderson1990%22+OR+bibcode%3A%22Azoulay2011%22+OR+bibcode%3A%22Small1973%22+OR+bibcode%3A%22Uzzi2005%22+OR+bibcode%3A%22Falk-Krzesinski2011%22+OR+bibcode%3A%22Stokols2008%22+OR+bibcode%3A%22Fiore2008%22+OR+bibcode%3A%22Research2004%22+OR+bibcode%3A%22Fleming2001%22+OR+bibcode%3A%22Schilling2011%22+OR+bibcode%3A%22Cluley2012%22+OR+bibcode%3A%22JONES2009%22+OR+bibcode%3A%22Weitzman1998%22+OR+bibcode%3A%22West2021%22+OR+bibcode%3A%22Kimm2011b%22+OR+bibcode%3A%22Rennehan2021%22+OR+bibcode%3A%22Whitcomb2013%22+OR+bibcode%3A%22CANaturalResourcesAgency%22+OR+bibcode%3A%22HarmsworthAssociates2010%22+OR+bibcode%3A%22Weis2021%22+OR+bibcode%3A%22Sanchez2020%22+OR+bibcode%3A%22Yu2021%22+OR+bibcode%3A%22Richings2020%22+OR+bibcode%3A%22Lim2020a%22+OR+bibcode%3A%22Collins2021%22+OR+bibcode%3A%22Haven2021%22+OR+bibcode%3A%22Wang2016a%22+OR+bibcode%3A%22Etemadpour2016%22+OR+bibcode%3A%22Paulovich2008%22+OR+bibcode%3A%22Chawla2021%22+OR+bibcode%3A%22Stringer2010%22&fl=abstract&fl=citation&fl=reference&fl=entry_date&fl=author&fl=volume&fl=page&fl=identifier&fl=bibcode&rows=300&cursorMark=%2A&sort=score+desc%2Cid+desc (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f80084ba370>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known'))

In [None]:
a.save_data()