# CDCS Public Data Exploration

This provides an example of how anonymous users can query records and templates in a CDCS instance without logging in, assuming read permissions have been set to allow it.

In [1]:
import cdcs
from cdcs import CDCS

print('Notebook executed for cdcs version', cdcs.__version__)

Notebook executed for cdcs version 0.2.0


Create client manager with anonymous user 

In [2]:
curator_v2 = CDCS('https://potentials.nist.gov/', username='')
print(curator_v2.cdcsversion)

(2, 15, 0)


In [3]:
curator_v3 = CDCS('https://test-potentials.nist.gov/', username='', verify=False)
print(curator_v3.cdcsversion)

(3, 0, 1)


In [4]:
curator = curator_v3

See the available record templates

In [5]:
curator.template_titles

['Action',
 'calculation_bond_angle_scan',
 'calculation_diatom_scan',
 'stacking_fault',
 'Request',
 'PotentialProperties',
 'potential_LAMMPS_KIM',
 'potential_LAMMPS',
 'Potential',
 'free_surface',
 'FAQ',
 'crystal_prototype',
 'Citation',
 'calculation_isolated_atom',
 'relaxed_crystal']

Query and explore the data

__Notes:__ current design downloads all matching records by iterating through all pages until next=None. Need args for

- limit: max # of items
- page: allows access of those beyond limit
- nitems: number retrieved per download (available in rest call?)

In [6]:
curator.query(template='Potential', keyword='Mishin')

100%|██████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 58.94it/s]


Unnamed: 0,id,template,workspace,user_id,title,xml_content,creation_date,last_modification_date,last_change_date,template_title
0,3264,9,1,5,potential.2017--Purja-Pun-G-P-Mishin-Y--Si,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:13:11.941000+00:00,2021-11-30 16:53:26.050000+00:00,2021-11-30 16:53:26.057000+00:00,Potential
1,3311,9,1,5,potential.2016--Samolyuk-G-D-Beland-L-K-Stocks...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:15:11.663000+00:00,2021-11-30 16:53:10.659000+00:00,2021-11-30 16:53:10.665000+00:00,Potential
2,2806,9,1,5,potential.2012--Purja-Pun-G-P-Mishin-Y--Co,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:10:28.109000+00:00,2021-11-30 16:48:47.958000+00:00,2021-11-30 16:48:47.964000+00:00,Potential
3,3146,9,1,5,potential.2010--Apostol-F-Mishin-Y--Al-H,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:15:47.041000+00:00,2021-11-30 16:48:21.673000+00:00,2021-11-30 16:48:21.680000+00:00,Potential
4,3247,9,1,5,potential.2004--Mishin-Y--Ni-Al,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:11:37.076000+00:00,2021-11-30 16:47:16.540000+00:00,2021-11-30 16:47:16.548000+00:00,Potential
5,2981,9,1,5,potential.2002--Mishin-Y-Mehl-M-J-Papaconstant...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:15:34.369000+00:00,2021-11-30 16:46:57.855000+00:00,2021-11-30 16:46:57.862000+00:00,Potential
6,2998,9,1,5,potential.2001--Mishin-Y-Mehl-M-J-Papaconstant...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:13:17.563000+00:00,2021-11-30 16:46:54.421000+00:00,2021-11-30 16:46:54.428000+00:00,Potential
7,3318,9,1,5,potential.2001--Mishin-Y-Mehl-M-J-Papaconstant...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:14:25.665000+00:00,2021-11-30 16:46:52.644000+00:00,2021-11-30 16:46:52.676000+00:00,Potential
8,3345,9,1,5,potential.2018--Howells-C-A-Mishin-Y--Cr-Ni,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:15:50.587000+00:00,2021-08-26 13:15:50.587000+00:00,2021-08-26 13:15:50.920000+00:00,Potential
9,2894,9,1,5,potential.2003--Zope-R-R-Mishin-Y--Al,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:15:45.095000+00:00,2021-08-26 13:15:45.095000+00:00,2021-08-26 13:15:45.435000+00:00,Potential


Same function for both keyword and query calls (which rest url is used is handled underneath).

In [7]:
records = curator.query(mongoquery={'interatomic-potential.element':'Ga'})
records

Unnamed: 0,id,template,workspace,user_id,title,xml_content,creation_date,last_modification_date,last_change_date,template_title
0,3335,9,1,5,potential.2009--Do-E-C-Shin-Y-H-Lee-B-J--Ga-N,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:14:20.232000+00:00,2022-01-14 20:53:59.021000+00:00,2022-01-14 20:53:59.029000+00:00,Potential
1,3325,9,1,5,potential.2009--Do-E-C-Shin-Y-H-Lee-B-J--Ga-In-N,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:13:03.653000+00:00,2022-01-14 20:53:53.393000+00:00,2022-01-14 20:53:53.400000+00:00,Potential
2,2733,9,1,5,potential.2009--Do-E-C-Shin-Y-H-Lee-B-J--Ga-In,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:11:05.366000+00:00,2022-01-14 20:53:44.135000+00:00,2022-01-14 20:53:44.276000+00:00,Potential
3,3333,9,1,5,potential.2017--Zhou-X-W-Jones-R-E-Chu-K--In-Ga-N,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:14:50.712000+00:00,2021-11-30 16:53:29.465000+00:00,2021-11-30 16:53:29.471000+00:00,Potential
4,3021,9,1,5,potential.2015--Elliott-R-S-Akerson-A--Ga,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:12:22.444000+00:00,2021-11-30 16:50:20.873000+00:00,2021-11-30 16:50:20.879000+00:00,Potential
5,2742,9,1,5,potential.2003--Nord-J-Albe-K-Erhart-P-Nordlun...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:15:18.574000+00:00,2021-08-26 13:15:18.574000+00:00,2021-08-26 13:15:18.896000+00:00,Potential
6,2801,9,1,5,potential.2002--Albe-K-Nordlund-K-Nord-J-Kuron...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:15:08.579000+00:00,2021-08-26 13:15:08.579000+00:00,2021-08-26 13:15:08.904000+00:00,Potential
7,2941,9,1,5,potential.2006--Murdick-D-A-Zhou-X-W-Wadley-H-...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:14:42.650000+00:00,2021-08-26 13:14:42.650000+00:00,2021-08-26 13:14:42.982000+00:00,Potential
8,3144,9,1,5,potential.2006--Bere-A-Serra-A--Ga-N,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:14:01.723000+00:00,2021-08-26 13:14:01.723000+00:00,2021-08-26 13:14:02.041000+00:00,Potential


Note that the fetched content is returned as a pandas.DataFrame. This allows for the content to be easily explored and further parsed.

In [8]:
records.title

0        potential.2009--Do-E-C-Shin-Y-H-Lee-B-J--Ga-N
1     potential.2009--Do-E-C-Shin-Y-H-Lee-B-J--Ga-In-N
2       potential.2009--Do-E-C-Shin-Y-H-Lee-B-J--Ga-In
3    potential.2017--Zhou-X-W-Jones-R-E-Chu-K--In-Ga-N
4            potential.2015--Elliott-R-S-Akerson-A--Ga
5    potential.2003--Nord-J-Albe-K-Erhart-P-Nordlun...
6    potential.2002--Albe-K-Nordlund-K-Nord-J-Kuron...
7    potential.2006--Murdick-D-A-Zhou-X-W-Wadley-H-...
8                 potential.2006--Bere-A-Serra-A--Ga-N
Name: title, dtype: object

In [9]:
records.xml_content[3]

'<?xml version="1.0" encoding="utf-8"?>\n<interatomic-potential><key>f4c3e281-bafc-4d23-9571-a796d8b1053f</key><id>2017--Zhou-X-W-Jones-R-E-Chu-K--In-Ga-N</id><record-version>2019-09-05</record-version><description><citation><document-type>journal</document-type><title>Polymorphic improvement of Stillinger-Weber potential for InGaN</title><author><given-name>X.W.</given-name><surname>Zhou</surname></author><author><given-name>R.E.</given-name><surname>Jones</surname></author><author><given-name>K.</given-name><surname>Chu</surname></author><publication-name>Journal of Applied Physics</publication-name><publication-date><year>2017</year></publication-date><volume>122</volume><issue>23</issue><abstract>A Stillinger-Weber potential is computationally very efficient for molecular dynamics simulations.  Despite its simple mathematical form, the Stillinger-Weber potential can be easily parameterized to ensure that crystal structures with tetrahedral bond angles (e.g., diamond-cubic, zinc-ble

In [10]:
records.sort_values('last_modification_date')

Unnamed: 0,id,template,workspace,user_id,title,xml_content,creation_date,last_modification_date,last_change_date,template_title
8,3144,9,1,5,potential.2006--Bere-A-Serra-A--Ga-N,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:14:01.723000+00:00,2021-08-26 13:14:01.723000+00:00,2021-08-26 13:14:02.041000+00:00,Potential
7,2941,9,1,5,potential.2006--Murdick-D-A-Zhou-X-W-Wadley-H-...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:14:42.650000+00:00,2021-08-26 13:14:42.650000+00:00,2021-08-26 13:14:42.982000+00:00,Potential
6,2801,9,1,5,potential.2002--Albe-K-Nordlund-K-Nord-J-Kuron...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:15:08.579000+00:00,2021-08-26 13:15:08.579000+00:00,2021-08-26 13:15:08.904000+00:00,Potential
5,2742,9,1,5,potential.2003--Nord-J-Albe-K-Erhart-P-Nordlun...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:15:18.574000+00:00,2021-08-26 13:15:18.574000+00:00,2021-08-26 13:15:18.896000+00:00,Potential
4,3021,9,1,5,potential.2015--Elliott-R-S-Akerson-A--Ga,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:12:22.444000+00:00,2021-11-30 16:50:20.873000+00:00,2021-11-30 16:50:20.879000+00:00,Potential
3,3333,9,1,5,potential.2017--Zhou-X-W-Jones-R-E-Chu-K--In-Ga-N,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:14:50.712000+00:00,2021-11-30 16:53:29.465000+00:00,2021-11-30 16:53:29.471000+00:00,Potential
2,2733,9,1,5,potential.2009--Do-E-C-Shin-Y-H-Lee-B-J--Ga-In,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:11:05.366000+00:00,2022-01-14 20:53:44.135000+00:00,2022-01-14 20:53:44.276000+00:00,Potential
1,3325,9,1,5,potential.2009--Do-E-C-Shin-Y-H-Lee-B-J--Ga-In-N,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:13:03.653000+00:00,2022-01-14 20:53:53.393000+00:00,2022-01-14 20:53:53.400000+00:00,Potential
0,3335,9,1,5,potential.2009--Do-E-C-Shin-Y-H-Lee-B-J--Ga-N,"<?xml version=""1.0"" encoding=""utf-8""?>\n<inter...",2021-08-26 13:14:20.232000+00:00,2022-01-14 20:53:59.021000+00:00,2022-01-14 20:53:59.029000+00:00,Potential
