# Link Prediction w/ n2v

In [17]:
!pip install arxiv

Collecting arxiv
  Downloading arxiv-1.4.2-py3-none-any.whl (11 kB)
Collecting feedparser
  Downloading feedparser-6.0.8-py3-none-any.whl (81 kB)
[K     |████████████████████████████████| 81 kB 1.7 MB/s eta 0:00:01
[?25hCollecting sgmllib3k
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
Building wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py) ... [?25ldone
[?25h  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6065 sha256=9628adb2c5f911b2608828809f9555a106d3a0b745a02b4cf5c0b995c871b39d
  Stored in directory: /Users/vatsalpatel/Library/Caches/pip/wheels/83/63/2f/117884c3b19d46b64d3d61690333aa80c88dc14050e269c546
Successfully built sgmllib3k
Installing collected packages: sgmllib3k, feedparser, arxiv
Successfully installed arxiv-1.4.2 feedparser-6.0.8 sgmllib3k-1.0.0


In [1]:
import networkx as nx
import scipy as sp
import pandas as pd
import numpy as np
import arxiv

from node2vec import node2vec as n2v

In [3]:
# constants
queries = [
    'automl', 'machinelearning', 'data', 'phyiscs','mathematics', 'recommendation system', 'nlp', 'neural networks'
]

# Import Data

In [18]:
def search_arxiv(queries, max_results = 10000):
    '''
    This function will search arxiv associated to a set of queries and store
    the latest 10000 (max_results) associated to that search.
    
    params:
        queries (List -> Str) : A list of strings containing keywords you want
                                to search on Arxiv
        max_results (Int) : The maximum number of results you want to see associated
                            to your search. Default value is 10000, capped at 300000
                            
    returns:
        This function will return a DataFrame holding the following columns associated
        to the queries the user has passed. 
            `title`, `date`, `article_id`, `url`, `main_topic`, `all_topics`
    
    example:
        research_df = search_arxiv(
            queries = ['automl', 'recommender system', 'nlp', 'data science'],
            max_results = 10000
        )
    '''
    d = []
    searches = []
    # hitting the API
    for query in queries:
        print(query)
        search = arxiv.Search(
          query = query,
          max_results = max_results,
          sort_by = arxiv.SortCriterion.SubmittedDate,
          sort_order = arxiv.SortOrder.Descending
        )
        searches.append(search)
    
    # Converting search result into df
    for search in searches:
        for res in search.results():
            data = {
                'title' : res.title,
                'date' : res.published,
                'article_id' : res.entry_id,
                'url' : res.pdf_url,
                'main_topic' : res.primary_category,
                'all_topics' : res.categories
            }
            d.append(data)
        
    d = pd.DataFrame(d)
    return d

In [None]:
%%time
research_df = search_arxiv(
    queries = ['automl', 'recommender system', 'nlp', 'data science'],
    max_results = 10000
)

Beginning Searches
query
query
query
query
Completed Searches
Building DataFrame


In [None]:
research_df.shape

In [13]:
search = arxiv.Search(
  query = "automl",
  max_results = 10,
  sort_by = arxiv.SortCriterion.SubmittedDate,
  sort_order = arxiv.SortOrder.Descending
)

In [16]:
d = []
for res in search.results():
    data = {
        'title' : res.title,
        'date' : res.published,
        'article_id' : res.entry_id,
        'url' : res.pdf_url,
        'main_topic' : res.primary_category,
        'all_topics' : res.categories
    }
    d.append(data)

In [17]:
pd.DataFrame(d)

Unnamed: 0,title,date,article_id,url,primary_category,all_categories
0,Review of automated time series forecasting pi...,2022-02-03 17:26:27+00:00,http://arxiv.org/abs/2202.01712v1,http://arxiv.org/pdf/2202.01712v1,cs.LG,[cs.LG]
1,Hubble Asteroid Hunter: I. Identifying asteroi...,2022-02-01 06:56:20+00:00,http://arxiv.org/abs/2202.00246v1,http://arxiv.org/pdf/2202.00246v1,astro-ph.EP,"[astro-ph.EP, astro-ph.IM]"
2,NAS-Bench-Suite: NAS Evaluation is (Now) Surpr...,2022-01-31 18:02:09+00:00,http://arxiv.org/abs/2201.13396v1,http://arxiv.org/pdf/2201.13396v1,cs.LG,"[cs.LG, cs.AI, stat.ML]"
3,Online AutoML: An adaptive AutoML framework fo...,2022-01-24 15:37:20+00:00,http://arxiv.org/abs/2201.09750v1,http://arxiv.org/pdf/2201.09750v1,cs.LG,"[cs.LG, cs.AI]"
4,Automated Reinforcement Learning (AutoRL): A S...,2022-01-11 12:41:43+00:00,http://arxiv.org/abs/2201.03916v1,http://arxiv.org/pdf/2201.03916v1,cs.LG,[cs.LG]
5,Winning solutions and post-challenge analyses ...,2022-01-11 06:21:18+00:00,http://arxiv.org/abs/2201.03801v1,http://arxiv.org/pdf/2201.03801v1,cs.LG,"[cs.LG, cs.AI]"
6,TPAD: Identifying Effective Trajectory Predict...,2022-01-09 06:50:39+00:00,http://arxiv.org/abs/2201.02941v1,http://arxiv.org/pdf/2201.02941v1,cs.LG,"[cs.LG, I.6.7; I.5.1]"
7,Neural Architecture Search for Inversion,2022-01-05 17:45:39+00:00,http://arxiv.org/abs/2201.01772v1,http://arxiv.org/pdf/2201.01772v1,cs.LG,[cs.LG]
8,AutoDES: AutoML Pipeline Generation of Classif...,2022-01-01 15:17:07+00:00,http://arxiv.org/abs/2201.00207v1,http://arxiv.org/pdf/2201.00207v1,cs.LG,[cs.LG]
9,"MLOps -- Definitions, Tools and Challenges",2022-01-01 10:38:31+00:00,http://arxiv.org/abs/2201.00162v1,http://arxiv.org/pdf/2201.00162v1,cs.LG,"[cs.LG, cs.SE, I.2]"


In [14]:

for result in search.results():
    print('Title: ', result.title, '\nDate: ',result.published , '\nId: ', result.entry_id,'\nURL: ', result.pdf_url, '\n\n', result.primary_category, result.categories)

Title:  Review of automated time series forecasting pipelines 
Date:  2022-02-03 17:26:27+00:00 
Id:  http://arxiv.org/abs/2202.01712v1 
URL:  http://arxiv.org/pdf/2202.01712v1 

 cs.LG ['cs.LG']
Title:  Hubble Asteroid Hunter: I. Identifying asteroid trails in Hubble Space Telescope images 
Date:  2022-02-01 06:56:20+00:00 
Id:  http://arxiv.org/abs/2202.00246v1 
URL:  http://arxiv.org/pdf/2202.00246v1 

 astro-ph.EP ['astro-ph.EP', 'astro-ph.IM']
Title:  NAS-Bench-Suite: NAS Evaluation is (Now) Surprisingly Easy 
Date:  2022-01-31 18:02:09+00:00 
Id:  http://arxiv.org/abs/2201.13396v1 
URL:  http://arxiv.org/pdf/2201.13396v1 

 cs.LG ['cs.LG', 'cs.AI', 'stat.ML']
Title:  Online AutoML: An adaptive AutoML framework for online learning 
Date:  2022-01-24 15:37:20+00:00 
Id:  http://arxiv.org/abs/2201.09750v1 
URL:  http://arxiv.org/pdf/2201.09750v1 

 cs.LG ['cs.LG', 'cs.AI']
Title:  Automated Reinforcement Learning (AutoRL): A Survey and Open Problems 
Date:  2022-01-11 12:41:43+00:00

## Generate Network