In [1]:
import ast
import copy
import numpy as np
import os

In [2]:
import cc.atlas as atlas
import cc.cartography as cartography
import cc.publication as publication
import cc.utils as utils
import cc.tex as tex

In [3]:
import trove
import verdict

# Literature Review

This notebook contains a record of how I performed the literature review for this work.

# Parameters

In [4]:
config_fp = '/Users/zhafen/paper_repos/Hot-Accretion-in-FIRE/literature_review/literature_review.trove'
cp = trove.config_parser.ConfigParser( config_fp )
pm = trove.link_params_to_config(
    config_fp,
    variation = 'thin_disk_galaxies',
)

In [5]:
atlas_dir = pm['root_data_dir']

In [6]:
topics = {}
for variation in cp.variations:
    topics[variation] = ast.literal_eval( cp.get( variation, 'publications' ) )

# Load Data

## Topics Data

In [7]:
topics_fp = os.path.join( pm['root_data_dir'], 'topics.json' )
topics_data = verdict.Dict.from_json( topics_fp, create_nonexisting=True )
if 'not_included' not in topics_data:
    topics_data['not_included'] = {}

## Zotero Atlas

In [8]:
# Larger atlas
a_zotero = atlas.Atlas( atlas_dir, load_bibtex=False, data_fp=pm['zotero_atlas_fp'] )

Loading saved atlas data.


0it [00:00, ?it/s]
100%|██████████| 1904/1904 [00:00<00:00, 14352.72it/s]


In [9]:
# Vectorization
vp_dict = a_zotero.vectorize( projection_fp=pm['zotero_projection_fp'] )
c_zotero = cartography.Cartographer( **vp_dict )

Vectorizing text...
Using saved vectorized text...


  if hasattr( a[first_element_index][0], 'decode' ):


## Large Atlas

In [10]:
# Larger atlas
a = atlas.Atlas( atlas_dir, load_bibtex=False, )

Loading saved atlas data.


0it [00:00, ?it/s]
100%|██████████| 19540/19540 [00:03<00:00, 5713.64it/s]


In [11]:
# Vectorization
vp_dict = a.vectorize()
c = cartography.Cartographer( **vp_dict )

Vectorizing text...
Using saved vectorized text...


## Tex Draft

In [12]:
pub_doc = tex.Tex( filepath=pm['tex_fp'] )

# Create a Reading List

## Setup

In [13]:
if pm['variation'] not in topics_data['not_included']:
    topics_data['not_included'][pm['variation']] = []

In [14]:
def sort_by_similarity( target_key, c ):
    
    cospsi = c.cospsi( target_key, 'all' )
    sort_inds = np.argsort(cospsi)[::-1]
    sorted_cospsi = cospsi[sort_inds]
    sorted_publications = c.publications[sort_inds]
    
    return sorted_cospsi, sorted_publications

In [62]:
def print_sorted_publications(
    sorted_cospsi,
    sorted_publications,
    kernel_size,
    a,
    show_unread_only = False,
    do_not_show_included = True,
    do_not_show_not_included = True,
    central_publication = '',
):

#     print( 'The {} most related publications are...\n'.format( kernel_size ) )

    n_shown = 0
    for i, key_i in enumerate( sorted_publications ):
                
        if i > kernel_size:
            break
                
        if key_i not in a.data:
            continue
        
        p_i = a[key_i]

        if 'read' in p_i.notes:
            read_flag = p_i.notes['read']
        else:
            read_flag = 'UNREAD'
        if show_unread_only and read_flag != 'UNREAD':
            continue
            
        included_flag = key_i in pub_doc.string           
        deliberately_not_included = key_i in topics_data['not_included'][pm['variation']]
        
        if do_not_show_included and included_flag:
            continue
        if do_not_show_not_included and deliberately_not_included:
            continue

        if not isinstance( p_i, publication.UnofficialPublication ):
            print( '{} -- {}'.format( n_shown, key_i, ) )
            print( p_i.citation['title'] )
            print( p_i.citation['author'] )
            print( 'Related rank: {}.{}'.format( i, central_publication ) ) 
            print( 'Read: {}'.format( ''.join( read_flag ) ) )
            print( 'Included: {}'.format( included_flag ) )
            print( p_i.citation['ENTRYTYPE'] + '\n' )
            print( p_i.points_str() + '\n\n' )
        else:
            pass
#             print( '{} -- {}'.format( n_shown, key_i, ) )
#             print( 'Related rank: {}.{}'.format( i, central_publication ) ) 
        
        n_shown += 1



## Conservative Reading List
For the average abstract in combination with chosen words.

### Create and add average vector

In [16]:
# Can add chosen words if so wished.
search_str = ''

In [17]:
for i, key_i in enumerate( pm['publications'] ):
    search_str += a[key_i].points_str()

In [18]:
if 'search_str' in pm:
    search_str += pm['search_str']

In [19]:
p = publication.UnofficialPublication( pm['variation'] )

In [20]:
p.process_abstract( abstract_str=search_str )

In [21]:
a.data[pm['variation']] = p

In [22]:
a.update( a_zotero )
a.prune_duplicates(preferred=list(a_zotero.data.keys()))

100%|██████████| 1904/1904 [00:00<00:00, 84002.55it/s]
100%|██████████| 17719/17719 [00:00<00:00, 112620.22it/s]
100%|██████████| 1816/1816 [00:00<00:00, 113941.21it/s]
100%|██████████| 16553/16553 [00:00<00:00, 111680.87it/s]


In [23]:
# Vectorize
vp_dict = a.vectorize( overwrite=True, projection_fp='pass', )
c = cartography.Cartographer( **vp_dict )

Vectorizing text...
    Retrieving publication data...


100%|██████████| 18369/18369 [00:00<00:00, 126403.85it/s]

    Calculating vectorization...





### Produce reading list

In [89]:
sorted_cospsi, sorted_publications = sort_by_similarity( pm['variation'], c )

In [90]:
pub_doc = tex.Tex( filepath=pm['tex_fp'] )

In [109]:
topics_data['not_included'][pm['variation']].append( '2012ApJ...750...10S' )
topics_data['not_included'][pm['variation']] = list( set( topics_data['not_included'][pm['variation']] ) )
topics_data.to_json( topics_fp )

In [110]:
print_sorted_publications(
    sorted_cospsi,
    sorted_publications,
    pm['kernel_size'] * 2,
    a,
    do_not_show_included = True,
    do_not_show_not_included = True,
    central_publication = pm['variation'],
)

## Extensive Reading List
For each and everyone of the publications.

In [28]:
a_pubs = []
sorted_cospsis = []
sorted_publications = []
for i, key_i in enumerate( pm['publications'] ):
    a_i = atlas.Atlas(atlas_dir, load_bibtex=False, load_atlas_data=False )
    a_i.data[key_i] = a[key_i]
    
    # Identify relevant publications
    sorted_cospsi_i, sorted_publications_i = sort_by_similarity( key_i, c )
    sorted_cospsis.append( sorted_cospsi_i )
    sorted_publications.append( sorted_publications_i )

    for key in sorted_publications_i:

        # Get missing publications
        if key in a_zotero.data:
            a_i.data[key] = a_zotero.data[key]
        elif key in a.data:
            a_i.data[key] = a.data[key]

    a_i.prune_duplicates(preferred=list(a_zotero.data.keys()))
    
    a_pubs.append( a_i )

100%|██████████| 1794/1794 [00:00<00:00, 112724.43it/s]
100%|██████████| 16199/16199 [00:00<00:00, 109801.87it/s]


In [29]:
pub_doc = tex.Tex( filepath=pm['tex_fp'] )

In [30]:
topics_data['not_included'][pm['variation']].append( '2017MNRAS.466.3460V' )
topics_data.to_json( topics_fp )

In [31]:
for i, a_i in enumerate( a_pubs ):
    
#     print( '===============================================================================' )
#     print( pm['publications'][i] + '\n' )
    
    print_sorted_publications(
        sorted_cospsis[i],
        sorted_publications[i],
        pm['kernel_size'],
        a_i,
        do_not_show_included = True,
        do_not_show_not_included = True,
        central_publication = pm['publications'][i],
    )

0 -- Bizyaev2021
Spectral Observations of Superthin Galaxies
Bizyaev, Dmitry and Makarov, D. I. and Reshetnikov, V. P. and Mosenkov, A. V. and Kautsch, S. J. and Antipova, A. V.
Related rank: 0.Bizyaev2021
Read: UNREAD
Included: False
article

Comment: 28 pages, 14 figures, accepted by {ApJ} Comment: 28 pages, 14 figures, accepted by {ApJ} Comment: 28 pages, 14 figures, accepted by {ApJ} Comment: 28 pages, 14 figures, accepted by {ApJ} Comment: 28 pages, 14 figures, accepted by {ApJ} We conduct spectral observations of 138 superthin galaxies (STGs) with high radial-to-vertical stellar disk scale ratios with the Dual Imaging Spectrograph on the 3.5 m telescope at the Apache Point Observatory (APO) to obtain the ionized gas rotation curves with R ~5000 resolution. We also performed near-infrared (NIR) H and Ks photometry for 18 galaxies with the NICFPS camera on the 3.5 m telescope. The spectra, the NIR photometry, and published optical and NIR photometry are used for modeling that utili