# Basic single-element properties

* Atomic radii, electronic configurations, electronegativities
* Melting temperature, phase diagram? or phase at the conditions of interest

**Scrape from Wikipedia data pages**

In [1]:
import os
import pickle
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

In [2]:
data_raw = '../data/raw'
data_ref = '../data/reference'

In [3]:
# list of wiki pages to scrape for data

urls = {
    'radius':'https://en.wikipedia.org/wiki/Atomic_radii_of_the_elements_(data_page)',
    'elneg':'https://en.wikipedia.org/wiki/Electronegativities_of_the_elements_(data_page)',
    'electron':'https://en.wikipedia.org/wiki/Electron_configurations_of_the_elements_(data_page)',
    'ionization_e':'https://en.wikipedia.org/wiki/Ionization_energies_of_the_elements_(data_page)',
    'T_m':'https://en.wikipedia.org/wiki/Melting_points_of_the_elements_(data_page)',
    'C_p':'https://en.wikipedia.org/wiki/Heat_capacities_of_the_elements_(data_page)'
}

In [4]:
# scrape data from tables on wiki data pages
# Store regular tables in data frames and others in ndarrays

dfs = {}
raw_data = {}
for name, url in urls.items():
    if len(url) == 0:
        continue
    page = requests.get(url).text
    soup = BeautifulSoup(page, 'lxml')
    #tables = soup.find_all('table',{'class':'wikitable sortable'})
    tables = soup.find_all('table',{'class':'wikitable'})
    for i, table in enumerate(tables):
        # header
        header = table.find('tr').find_all('th')
        columns = [th.text.rstrip('\n') for th in header]
        
        data = []
        for tr in table.find_all('tr'):
            #print([c.text.rstrip('\n') for c in tr.find_all('td')])
            row = [c.text.rstrip('\n') for c in tr.find_all('td')]
            if len(row) > 0:
                data.append(row)

        data = np.array(data)
        
        # make dataframe
        if len(data.shape) == 2:
            dfs[name+str(i)] = pd.DataFrame(data, columns=columns)
        else:
            raw_data[name+str(i)] = data

In [5]:
df_rad = dfs['radius0']
df_rad.loc[df_rad['Metallic'].str.contains('\d+', regex=True)]

Unnamed: 0,atomic number,symbol,name,empirical †,Calculated,van der Waals,Covalent (single bond),Covalent (triple bond),Metallic
2,3,Li,lithium,145,167,182,134,no data,152
3,4,Be,beryllium,105,112,153 a,90,85,112
10,11,Na,sodium,180,190,227,154,no data,186
11,12,Mg,magnesium,150,145,173,130,127,160
12,13,Al,aluminium,125,118,184 a,118,111,143
18,19,K,potassium,220,243,275,196,no data,227
19,20,Ca,calcium,180,194,231 a,174,133,197
20,21,Sc,scandium,160,184,211 a,144,114,162 b
21,22,Ti,titanium,140,176,no data,136,108,147
22,23,V,vanadium,135,171,no data,125,106,134 b


In [6]:
df_eln = dfs['elneg1']
df_eln

Unnamed: 0,Number,Symbol,Name,Electronegativity
0,1,H,hydrogen,2.300
1,2,He,helium,4.160
2,3,Li,lithium,0.912
3,4,Be,beryllium,1.576
4,5,B,boron,2.051
5,6,C,carbon,2.544
6,7,N,nitrogen,3.066
7,8,O,oxygen,3.610
8,9,F,fluorine,4.193
9,10,Ne,neon,4.787


In [19]:
my_elements = ['Al', 'Fe', 'Ni', 'Cr', 'Co']
df_rad.loc[df_rad.symbol.isin(my_elements)][['symbol', 'Metallic', 'Covalent (single bond)', 'Covalent (triple bond)']]#in my_elements]
#.loc[df_rad['Metallic'].str.contains('\d+', regex=True)]

Unnamed: 0,symbol,Metallic,Covalent (single bond),Covalent (triple bond)
12,Al,143,118,111
23,Cr,128 b,127,103
25,Fe,126 b,125,102
26,Co,125 b,126,96
27,Ni,124 b,121,101


In [18]:
df_eln.loc[df_eln.Symbol.isin(my_elements)][['Symbol', 'Electronegativity']]

Unnamed: 0,Symbol,Electronegativity
12,Al,1.613
23,Cr,1.65
25,Fe,1.8
26,Co,1.84
27,Ni,1.88


In [20]:
dfs.keys()

dict_keys(['radius0', 'elneg1'])

In [107]:
# Save dataframes
for key, dfi in dfs.items():
    dfi.to_csv(os.path.join(data_ref, key+'.csv'), index=False)

In [109]:
# Save other data to pickles
for key, dat in raw_data.items():
    with open(os.path.join(data_ref, key+'.pickle'), 'wb') as fo:
        pickle.dump(dat, fo)