# INSPIRE-HEP PostDoc position search
* Use python to do web scraping
* See [INSPIRE API](http://inspirehep.net/info/hep/api)
* Example codes:
  * [inspire_api_author_citations.py](http://inspirehep.net/info/hep/tools/inspire_api_author_citations.py)
  * [Spires.py](http://www.stringwiki.org/wiki/Spires.py)
  * [Listcitations.py](https://www.stringwiki.org/wiki/Listcitations.py)
  * [pyinspire.py](https://bitbucket.org/ihuston/pyinspire/src/8802a566f5427f44c004ad93ec000755a9d53f96/pyinspire/pyinspire.py?at=master&fileviewer=file-view-default)

### Import packages

In [23]:
# from urllib.request import urlopen, Request
import requests
from bs4 import BeautifulSoup
import re # regular expension
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

### Search fields

In [24]:
Rank = ['Senior',
        'Junior',
        'Postdoc',
        'Student',
        'Visiting Scientist',
        'Staff']
# print(len(Rank))

Region = ['Africa',
          'Asia',
          'Australasia',
          'Europe',
          'Middle East',
          'North America',
          'South America']
# print(len(Region))

Field = ['astro-ph',
         'cond-mat',
         'cs',
         'gr-qc',
         'hep-ex',
         'hep-lat',
         'hep-ph',
         'hep-th',
         'math',
         'math-ph',
         'nucl-ex',
         'nucl-th',
         'physics',
         'physics.acc-phys',
         'physics.ins-det',
         'physics-other',
         'quant-ph']
# print(len(Field))

### Get searh URL
* URL format:
  * http://inspirehep.net/search?p1=rank:%22postdoc%22+region:%22north+america%22+subject:%22hep-ex%22+&op1=a&p2=&action_search=Search&cc=Jobs
  * http://inspirehep.net/search?cc=Jobs&ln=en&p1=rank:%22postdoc%22+region:%22north+america%22+subject:%22hep-ex%22&rg=250&op1=a
  * http://inspirehep.net/search?cc=Jobs&ln=en&p1=rank%3A%22postdoc%22+region%3A%22north+america%22+subject%3A%22hep-ex%22&jrec=51&op1=a
  * http://inspirehep.net/search?cc=Jobs&ln=en&p1=rank%3A%22postdoc%22+region%3A%22north+america%22+subject%3A%22hep-ex%22&jrec=76&op1=a


In [25]:
def inspire_url(rank='Postdoc', region='North America', field='hep-ex'):
    if rank not in Rank:
        print('Error: {} must be in {}'.format(rank, Rank))
    if region not in Region:
        print('Error: {} must be in {}'.format(region, Region))
    if field not in Field:
        print('Error: {} must be in {}'.format(field, Field))
    
    # Turn to lower case
    rank = str.lower(rank)
    region = str.lower(region)
    field = str.lower(field)
    
    # If there are two word, then convert to word1+word2
    rank = '+'.join( rank.split() )
    region = '+'.join( region.split() )
    field = '+'.join( field.split() )
    
#     print(rank)
#     print(region)
#     print(field)
    
    url = 'http://inspirehep.net/search?cc=Jobs&ln=en&' + \
          'p1=rank:%22' + rank + '%22+' + \
          'region:%22' + region + '%22+' + \
          'subject:%22' + field + '%22&rg=250&op1=a'
    
#     print(url)
    return url

### Get HTML page information

In [26]:
url = inspire_url()
# print(url)

def get_HTML(url):
    r = requests.get(url)
#     print(r.text)
    soup = BeautifulSoup(r.text, 'html.parser')
#     print(soup.prettify())

    return soup

### List of jobs

In [27]:
def get_job_blocks(soup):
    div_tags = soup.find_all('div', class_='record_body')
#     for div in div_tags:
#         print(div)
    return div_tags

def get_dates(soup):
    dates = soup.find_all('strong')
#     print(dates)
    post_date = dates[0].string.strip(':')
    if len(dates) > 1:
        deadline = dates[1].string.split()[1].strip(']')
        if deadline == 'OPEN':
            deadline = 'OPEN UNTIL FILLED'
    else:
        deadline = ''
#     print(post_date)
#     print(deadline)
    return post_date, deadline

# def get_post_dates(soup):
#     dates = soup.find('strong')
#     print(dates)
#     return dates

# def get_deadline(soup):
#     deadline = soup.find(text=re.compile('Deadline:'))
#     print(deadline)

def get_institution(soup):
    institutions = soup.find_all('a', href=re.compile('cc=Institutions'))
#     print(institutions[0].string)
    return institutions

def get_experiments(soup):
    experiments = soup.find_all('a', href=re.compile('cc=Experiments'))
#     print(experiments)
#     for exp in experiments:
#         print(exp.string)
    return experiments

def get_position(soup):
    positions = soup.find_all('span', style=re.compile('transform'))
#     print(positions[0].string)
    return positions[0].string
    
def get_post_topic(soup):
    post_topic = soup.find_all('a', href=re.compile('http://inspirehep.net/record/'))
    topic = post_topic[0].string
    topic_link = post_topic[0].get('href')
#     print(topic)
#     print(topic_link)
    return topic, topic_link

In [28]:
soup = get_HTML(url)
div_tages = get_job_blocks(soup)
# print(len(div_tages))

list_of_series = []
for div in div_tages:
#     print(div.prettify())
    post_date, deadline = get_dates(div)
    institutions = get_institution(div)
    experiments = get_experiments(div)
    
    list_exp = []
    for exp in experiments:
        list_exp.append(exp.string)
    exp = ', '.join(list_exp)
    
    position = get_position(div)
    topic, topic_link = get_post_topic(div)

#     print(post_date, deadline)
#     print(institutions[0].string)
#     print(list_exp)
#     print(position)
#     print(topic, topic_link)

    s = pd.Series([post_date, deadline, institutions[0].string, exp, position, topic, topic_link])
#     print(s)
    list_of_series.append(s)

### Convert into DataFrame

In [29]:
df = pd.concat(list_of_series, axis=1).transpose()
df.columns = ['Post Date', 'Deadline', 'Institution', 'Experiments', 'Position', 'Topic', 'Link']
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 7 columns):
Post Date      104 non-null object
Deadline       104 non-null object
Institution    104 non-null object
Experiments    104 non-null object
Position       104 non-null object
Topic          104 non-null object
Link           104 non-null object
dtypes: object(7)
memory usage: 5.8+ KB


### Use datetime64 format
* Only change 'Post Date' to datetime64 because there are OPEN UNTIL FILLED in 'Deadline'
* Use 'Post Date' as row index

In [30]:
df['Post Date'] = pd.to_datetime(df['Post Date'])
# df['Deadline'] = pd.to_datetime(df['Deadline'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 7 columns):
Post Date      104 non-null datetime64[ns]
Deadline       104 non-null object
Institution    104 non-null object
Experiments    104 non-null object
Position       104 non-null object
Topic          104 non-null object
Link           104 non-null object
dtypes: datetime64[ns](1), object(6)
memory usage: 5.8+ KB


In [31]:
df.set_index('Post Date', inplace=True)
df

Unnamed: 0_level_0,Deadline,Institution,Experiments,Position,Topic,Link
Post Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-10-22,2018-12-31,South Dakota U.,"LEGEND, DUNE, MINER, ORNL-SNS-COHERENT",Postdoc,Experimental Neutrino Physics (NFE00389P),http://inspirehep.net/record/1699522
2018-10-19,2018-12-31,Brookhaven Natl. Lab.,DUNE,Postdoc,Experimental High Energy Physics (1484),http://inspirehep.net/record/1699335
2018-10-18,2018-11-25,Cincinnati U.,CERN-LHC-LHCb,Postdoc,Experimental High Energy Physics (34341),http://inspirehep.net/record/1699112
2018-10-12,2018-11-01,Alabama U.,LZ,Postdoc,LZ Dark Matter Search Experiment (0811381),http://inspirehep.net/record/1698207
2018-10-11,2018-11-30,"Queen's U., Kingston",NEWS-G,Postdoc,Postdoctoral Research Associate,http://inspirehep.net/record/1697657
2018-10-09,2018-10-31,Oak Ridge,,Postdoc,Neutrino physics (NB50690608),http://inspirehep.net/record/1697453
2018-10-09,2018-11-01,Yale U.,,Postdoc,Yale Mossman Fellowship (AJO-12228),http://inspirehep.net/record/1697270
2018-10-05,2018-11-15,UCLA,ICECUBE,Postdoc,Particle Astrophysics,http://inspirehep.net/record/1696937
2018-10-04,2018-12-16,"Queen's U., Kingston","MAJORANA, LEGEND",Postdoc,Experimental Particle Astrophysics and Ge Dete...,http://inspirehep.net/record/1696890
2018-10-04,2018-11-09,SLAC,CERN-LHC-ATLAS,Postdoc,Experimental High Energy Physics (AJO-12240),http://inspirehep.net/record/1696790


In [32]:
df.sort_values(by=['Deadline'])

Unnamed: 0_level_0,Deadline,Institution,Experiments,Position,Topic,Link
Post Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-02-09,,Caltech,"FNAL-E-0929, DUNE",Postdoc,Experimental High Energy Physics (AJO-10850),http://inspirehep.net/record/1654235
2018-06-08,,North Carolina U.,"MAJORANA, KATRIN, LEGEND",Postdoc,Experimental Nuclear and Astroparticle Physics...,http://inspirehep.net/record/1676807
2017-09-07,2018-03-31,Argonne (main),,Postdoc,"Appointee, Electrical Engineering",http://inspirehep.net/record/1621882
2018-04-06,2018-05-15,"Queen's U., Kingston",PICO,Postdoc,Experimental Dark Matter Postdoc,http://inspirehep.net/record/1666148
2018-05-02,2018-06-01,"IIT, Chicago","PROSPECT, DAYA-BAY, FNAL-E-0974, SBND, DUNE",Postdoc,Neutrino Physics,http://inspirehep.net/record/1671065
2018-04-10,2018-06-10,Fermi National Accelerator Laboratory,,Postdoc,Theoretical Physics and Scientific Computing (...,http://inspirehep.net/record/1667022
2018-01-28,2018-06-15,Hawaii U.,modular RICH and Timing Vertex Detector (initi...,Postdoc,System on Chip Application Specific Integrated...,http://inspirehep.net/record/1650518
2018-01-28,2018-06-15,Hawaii U.,Next Generation Discovery Detectors,Postdoc,Detector Development Research Fellowship,http://inspirehep.net/record/1650517
2018-03-22,2018-06-26,"Cornell U., CLASSE",,Postdoc,Accelerator Physics (AJO-10987),http://inspirehep.net/record/1663521
2018-06-21,2018-07-23,"LBNL, NSD",CERN-LHC-ALICE,Postdoc,Experimental Particle Physics (85241),http://inspirehep.net/record/1678812


### Seperate the original DataFrame into 3 sub DataFrames
* df_open_until_filled contians records with OPEN UNTIL FILLED in the date column
* df_empty_deadline contains records without deadline information
* df_new contains the rest parts

In [33]:
df_open_until_filled = df[df['Deadline'] == 'OPEN UNTIL FILLED'].copy()
df_empty_deadline = df[df['Deadline'] == ''].copy()

# df_new is a copy not a slice
df_new = df[(df['Deadline'] != 'OPEN UNTIL FILLED') & (df['Deadline'] != '')].copy()
df_new

Unnamed: 0_level_0,Deadline,Institution,Experiments,Position,Topic,Link
Post Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-10-22,2018-12-31,South Dakota U.,"LEGEND, DUNE, MINER, ORNL-SNS-COHERENT",Postdoc,Experimental Neutrino Physics (NFE00389P),http://inspirehep.net/record/1699522
2018-10-19,2018-12-31,Brookhaven Natl. Lab.,DUNE,Postdoc,Experimental High Energy Physics (1484),http://inspirehep.net/record/1699335
2018-10-18,2018-11-25,Cincinnati U.,CERN-LHC-LHCb,Postdoc,Experimental High Energy Physics (34341),http://inspirehep.net/record/1699112
2018-10-12,2018-11-01,Alabama U.,LZ,Postdoc,LZ Dark Matter Search Experiment (0811381),http://inspirehep.net/record/1698207
2018-10-11,2018-11-30,"Queen's U., Kingston",NEWS-G,Postdoc,Postdoctoral Research Associate,http://inspirehep.net/record/1697657
2018-10-09,2018-10-31,Oak Ridge,,Postdoc,Neutrino physics (NB50690608),http://inspirehep.net/record/1697453
2018-10-09,2018-11-01,Yale U.,,Postdoc,Yale Mossman Fellowship (AJO-12228),http://inspirehep.net/record/1697270
2018-10-05,2018-11-15,UCLA,ICECUBE,Postdoc,Particle Astrophysics,http://inspirehep.net/record/1696937
2018-10-04,2018-12-16,"Queen's U., Kingston","MAJORANA, LEGEND",Postdoc,Experimental Particle Astrophysics and Ge Dete...,http://inspirehep.net/record/1696890
2018-10-04,2018-11-09,SLAC,CERN-LHC-ATLAS,Postdoc,Experimental High Energy Physics (AJO-12240),http://inspirehep.net/record/1696790


### Convert Deadline to datetime64 format in df_new

In [34]:
df_new['Deadline'] = pd.to_datetime(df_new['Deadline'])
df_new.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 99 entries, 2018-10-22 to 2015-06-10
Data columns (total 6 columns):
Deadline       99 non-null datetime64[ns]
Institution    99 non-null object
Experiments    99 non-null object
Position       99 non-null object
Topic          99 non-null object
Link           99 non-null object
dtypes: datetime64[ns](1), object(5)
memory usage: 5.4+ KB


### Select date after today

In [35]:
today = pd.Timestamp("today").date()
# print(type(today))
today = np.datetime64(today)
# print(type(today))
df_new[df_new['Deadline'] > today].sort_values(by=['Deadline'])

Unnamed: 0_level_0,Deadline,Institution,Experiments,Position,Topic,Link
Post Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-05-05,2018-10-24,Arizona U.,CERN-LHC-ATLAS,Postdoc,Experimental Particle Physics (P20553),http://inspirehep.net/record/1671770
2018-08-27,2018-10-30,Boston U.,CERN-LHC-CMS,Postdoc,Experimental High Energy Physics,http://inspirehep.net/record/1691567
2018-02-26,2018-10-31,Northwestern U.,"CERN-LHC-CMS, FNAL-E-0973",Postdoc,Experimental Particle Physics,http://inspirehep.net/record/1657296
2018-04-09,2018-10-31,Penn State U.,"LUX, LZ",Postdoc,Dark Matter Direct Detection Experiments (77950),http://inspirehep.net/record/1666771
2018-09-19,2018-10-31,"UC, Berkeley",CERN-LHC-ATLAS,Postdoc,Experimental Particle Physics,http://inspirehep.net/record/1694661
2018-09-13,2018-10-31,SLAC,,Postdoc,"Machine Learning for LHC, Neutrino, and Cosmol...",http://inspirehep.net/record/1693769
2018-09-06,2018-10-31,Southern Methodist U.,CERN-LHC-ATLAS,Postdoc,Experimental High Energy Physics (AJO-11851),http://inspirehep.net/record/1692898
2018-10-09,2018-10-31,Oak Ridge,,Postdoc,Neutrino physics (NB50690608),http://inspirehep.net/record/1697453
2018-08-28,2018-10-31,McGill U.,nEXO,Postdoc,nEXO Detector Development (AJO-11433),http://inspirehep.net/record/1691714
2018-09-06,2018-11-01,"UC, Davis",DARKSIDE,Postdoc,Experimental Astroparticle Physics,http://inspirehep.net/record/1692758


In [36]:
### Get contacts and emails information from another page

In [37]:
def get_contacts(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
#     print(soup.prettify())
    contacts = [i.string for i in soup.find_all('a', href=re.compile('cc=HepNames'))]
#     print(contacts)
    return ', '.join(contacts)
    
def get_emails(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    mails = []
    for i in soup.find_all('a', href=re.compile('mailto')):
        if i.string != None:
            if 'Click here' not in i.string and 'feedback@inspirehep.net' not in i.string:
                mails.append(i.string)
    # use set() to get unique value in a list
    # need to convert back to list after set()
    mails = list(set(mails))
#     print(mails)
    return ', '.join(mails)
    
# get_emails('http://inspirehep.net/record/1687795') # this contains NoneType

def get_contacts_and_emails(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    contacts = [i.string for i in soup.find_all('a', href=re.compile('cc=HepNames'))]
    contacts = ', '.join(contacts)
    
    mails = []
    for i in soup.find_all('a', href=re.compile('mailto')):
        if i.string != None:
            if 'Click here' not in i.string and 'feedback@inspirehep.net' not in i.string:
                mails.append(i.string)
    # use set() to get unique value in a list
    # need to convert back to list after set()
    mails = list(set(mails))
    mails = ', '.join(mails)
    
    return contacts, mails

In [38]:
df_new['Contacts'] = df_new['Link'].apply(get_contacts)
df_new['Emails'] = df_new['Link'].apply(get_emails)
# df_new['Contacts'] = contacts
# df_new['Emails'] = mails

In [39]:
# df_new.sort_values(by=['Deadline'])
df_new[df_new['Deadline'] > today].sort_values(by=['Deadline'])

Unnamed: 0_level_0,Deadline,Institution,Experiments,Position,Topic,Link,Contacts,Emails
Post Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-05-05,2018-10-24,Arizona U.,CERN-LHC-ATLAS,Postdoc,Experimental Particle Physics (P20553),http://inspirehep.net/record/1671770,Erich Varnes,varnes@physics.arizona.edu
2018-08-27,2018-10-30,Boston U.,CERN-LHC-CMS,Postdoc,Experimental High Energy Physics,http://inspirehep.net/record/1691567,Zeynep Demiragli,zdemirag@bu.edu
2018-02-26,2018-10-31,Northwestern U.,"CERN-LHC-CMS, FNAL-E-0973",Postdoc,Experimental Particle Physics,http://inspirehep.net/record/1657296,Mayda M. Velasco,"nuparticlepostdoc@northwestern.edu, m-velasco@..."
2018-04-09,2018-10-31,Penn State U.,"LUX, LZ",Postdoc,Dark Matter Direct Detection Experiments (77950),http://inspirehep.net/record/1666771,Carmen Carmona,"carmona@psu.edu, physicsapply@psu.edu"
2018-09-19,2018-10-31,"UC, Berkeley",CERN-LHC-ATLAS,Postdoc,Experimental Particle Physics,http://inspirehep.net/record/1694661,Heather Gray,heather.gray@berkeley.edu
2018-09-13,2018-10-31,SLAC,,Postdoc,"Machine Learning for LHC, Neutrino, and Cosmol...",http://inspirehep.net/record/1693769,Kazuhiro Terao,"kterao@SLAC.Stanford.EDU, pjm@slac.stanford.ed..."
2018-09-06,2018-10-31,Southern Methodist U.,CERN-LHC-ATLAS,Postdoc,Experimental High Energy Physics (AJO-11851),http://inspirehep.net/record/1692898,Ryszard Stroynowski,ryszard@physics.smu.edu
2018-10-09,2018-10-31,Oak Ridge,,Postdoc,Neutrino physics (NB50690608),http://inspirehep.net/record/1697453,,
2018-08-28,2018-10-31,McGill U.,nEXO,Postdoc,nEXO Detector Development (AJO-11433),http://inspirehep.net/record/1691714,Thomas Brunner,neutrino@physics.mcgill.ca
2018-09-06,2018-11-01,"UC, Davis",DARKSIDE,Postdoc,Experimental Astroparticle Physics,http://inspirehep.net/record/1692758,Emilija Pantic,pantic@ucdavis.edu


### Deal with the other two DataFrames
* Add contacts and emails information in df_open_until_filled and df_empty_deadline

In [40]:
# print(df_open_until_filled.shape)
df_open_until_filled['Contacts'] = df_open_until_filled['Link'].apply(get_contacts)
df_open_until_filled['Emails'] = df_open_until_filled['Link'].apply(get_emails)

df_open_until_filled

Unnamed: 0_level_0,Deadline,Institution,Experiments,Position,Topic,Link,Contacts,Emails
Post Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-06-22,OPEN UNTIL FILLED,"UC, Santa Barbara","CERN-LHC-CMS, milliQan",Postdoc,Experimental Particle Physics,http://inspirehep.net/record/1679060,Claudio Campagnari,claudio@physics.ucsb.edu
2017-08-29,OPEN UNTIL FILLED,"Texas Tech. U., Lubbock",CERN-LHC-CMS,Postdoc,Experimental High Energy Physics,http://inspirehep.net/record/1620415,Sung-Won Lee,sungwon.lee@ttu.edu
2016-06-08,OPEN UNTIL FILLED,"U. Alabama, Tuscaloosa","EXO-200, nEXO",Postdoc,Nuclear and Particle Physics (0810514),http://inspirehep.net/record/1468124,Michele Kijeski,"andreas@ua.edu, mykijeski@ua.edu"


In [41]:
# print(df_empty_deadline.shape)
df_empty_deadline['Contacts'] = df_empty_deadline['Link'].apply(get_contacts)
df_empty_deadline['Emails'] = df_empty_deadline['Link'].apply(get_emails)

df_empty_deadline

Unnamed: 0_level_0,Deadline,Institution,Experiments,Position,Topic,Link,Contacts,Emails
Post Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-06-08,,North Carolina U.,"MAJORANA, KATRIN, LEGEND",Postdoc,Experimental Nuclear and Astroparticle Physics...,http://inspirehep.net/record/1676807,John F Wilkerson,"jfw@unc.edu, jeanniec@unc.edu"
2018-02-09,,Caltech,"FNAL-E-0929, DUNE",Postdoc,Experimental High Energy Physics (AJO-10850),http://inspirehep.net/record/1654235,Ryan Patterson,rbpatter@caltech.edu


### Merge all DataFrames
* But after merging, the table lose the function to sort

In [42]:
df_all = pd.concat([df_new, df_open_until_filled, df_empty_deadline], sort=False)
df_all

Unnamed: 0_level_0,Deadline,Institution,Experiments,Position,Topic,Link,Contacts,Emails
Post Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-10-22,2018-12-31 00:00:00,South Dakota U.,"LEGEND, DUNE, MINER, ORNL-SNS-COHERENT",Postdoc,Experimental Neutrino Physics (NFE00389P),http://inspirehep.net/record/1699522,Jing Liu,jing.liu@usd.edu
2018-10-19,2018-12-31 00:00:00,Brookhaven Natl. Lab.,DUNE,Postdoc,Experimental High Energy Physics (1484),http://inspirehep.net/record/1699335,Steven Kettel,kettel@bnl.gov
2018-10-18,2018-11-25 00:00:00,Cincinnati U.,CERN-LHC-LHCb,Postdoc,Experimental High Energy Physics (34341),http://inspirehep.net/record/1699112,Michael Sokoloff,mike.sokoloff@uc.edu
2018-10-12,2018-11-01 00:00:00,Alabama U.,LZ,Postdoc,LZ Dark Matter Search Experiment (0811381),http://inspirehep.net/record/1698207,Jerry Busenitz,busenitz@ua.edu
2018-10-11,2018-11-30 00:00:00,"Queen's U., Kingston",NEWS-G,Postdoc,Postdoctoral Research Associate,http://inspirehep.net/record/1697657,"Gilles Gerbier, Julie Mc Donald","gg45@queensu.ca, jmm27@queensu.ca"
2018-10-09,2018-10-31 00:00:00,Oak Ridge,,Postdoc,Neutrino physics (NB50690608),http://inspirehep.net/record/1697453,,
2018-10-09,2018-11-01 00:00:00,Yale U.,,Postdoc,Yale Mossman Fellowship (AJO-12228),http://inspirehep.net/record/1697270,Witold Skiba,witold.skiba@yale.edu
2018-10-05,2018-11-15 00:00:00,UCLA,ICECUBE,Postdoc,Particle Astrophysics,http://inspirehep.net/record/1696937,Nathan Whitehorn,"i3postdocsearch@physics.ucla.edu, nwhitehorn@p..."
2018-10-04,2018-12-16 00:00:00,"Queen's U., Kingston","MAJORANA, LEGEND",Postdoc,Experimental Particle Astrophysics and Ge Dete...,http://inspirehep.net/record/1696890,Ryan Martin,ryan.martin@queensu.ca
2018-10-04,2018-11-09 00:00:00,SLAC,CERN-LHC-ATLAS,Postdoc,Experimental High Energy Physics (AJO-12240),http://inspirehep.net/record/1696790,Michael Kagan,makagan@slac.stanford.edu


### Convert into CSV

In [43]:
df_new.to_csv('PostDoc.csv')

### Convert into SQL
* sqlite

In [44]:
engine = create_engine('sqlite:///PostDoc.sqlite', echo=False)
df.to_sql('PostDoc', con=engine)