# Record consistency checks

This Notebook provides consistency checks between 
- records stored at potentials.nist.gov and in the potentials-library github repository
- records of different styles that share content

In [119]:
import uuid
import datetime
import shutil
from pathlib import Path

import potentials

import numpy as np
import pandas as pd

import iprPy

# Jupyter display libraries
from IPython.core.display import display, HTML

In [2]:
#machine = 'laptop'
machine = 'desktop'

In [3]:
# Set passwordfile based on machine
if machine == 'laptop':
    passwordfile = Path('C:/Users/lmh1/Documents/potentials_nist_gov/password.txt')
elif machine == 'desktop':
    passwordfile = Path('E:/potentials_nist_gov/password.txt')
else:
    raise ValueError(f'passwordfile not set for machine {machine}')

# Read database username + password
with open(Path(passwordfile)) as f:
    username, password = f.read().strip().split()

# Create 3 database objects: db_local for only local records, db_remote for only remote records, and db for the combined    
db = potentials.Database(username=username, password=password)
db_local = potentials.Database(username=username, password=password, local=True, remote=False)
db_remote = potentials.Database(username=username, password=password, local=False, remote=True)

In [4]:
#workspace = None
workspace = db.cdcs.global_workspace

## 1. Check Citations

In [107]:
db_local.load_citations(verbose=True)
db_remote.load_citations(verbose=True)

Loaded 355 local citations
Loaded 355 remote citations


---

Check for missing records

In [108]:
print('missing from local:')
db_remote.citations_df[(~db_remote.citations_df.doi.isin(db_local.citations_df.doi.tolist()))
                      |(~db_remote.citations_df.note.isin(db_local.citations_df.note.tolist()))]

missing from local:


Unnamed: 0,ENTRYTYPE,ID,abstract,address,author,booktitle,day,doi,journal,month,note,number,numpages,pages,publisher,title,url,volume,year


In [106]:
# Update remote to local
#citation = db_remote.get_citation('10.25950/962B4967', verbose=True)
#db_local.delete_citation(citation, verbose=True, remote=True, local=False)

Citation retrieved from loaded citations
record 10.25950_962b4967 (6049112f26ed1e003acff721) has been deleted.


In [109]:
print('missing from remote:')
db_local.citations_df[(~db_local.citations_df.doi.isin(db_remote.citations_df.doi.tolist()))
                     |(~db_local.citations_df.note.isin(db_remote.citations_df.note.tolist()))]

missing from remote:


Unnamed: 0,ENTRYTYPE,ID,abstract,address,author,booktitle,day,doi,journal,month,note,number,numpages,pages,publisher,title,url,volume,year


---

Compare record contents

In [110]:
db_local.load_citations(verbose=True, remote=False, local=True)
db_remote.load_citations(verbose=True, remote=True, local=False)
for local_cite in db_local.citations:
    try:
        match = db_remote.citations_df.doi == local_cite.doi
    except:
        match = db_remote.citations_df.note == local_cite.note
    if sum(match) > 1:
        print('multiple matches for', local_cite.doifname)
    elif sum(match) == 0:
        print('no matches for', local_cite.doifname)
    else:
        remote_cite = db_remote.citations[match][0]
        if local_cite.bibtex != remote_cite.bibtex:
            print('different data for', local_cite.doifname)
            print()
            print('local:')
            print(local_cite.bibtex)
            print('remote:')
            print(remote_cite.bibtex)
            break


Loaded 355 local citations
Loaded 355 remote citations


In [100]:
print(len(local_cite.bibtex) == len(remote_cite.bibtex))
same = ''
for i in range(len(local_cite.bibtex)):
    if local_cite.bibtex[i] == remote_cite.bibtex[i]:
        same += local_cite.bibtex[i]
    else:
        print(same)
        break

True
@article{Ko_2011,
 abstract = {Second nearest-neighbor modified embedded-atom method (MEAM) interatomic potentials for the Al


In [59]:
# Update local to remote
citation = db_local.get_citation(local_cite.doi, verbose=True)
db_remote.upload_citation(citation, workspace=workspace, verbose=True)

record 10.1016_j.actamat.2008.09.031 (6065f19a26ed1e003628efe6) has been updated.
record 6065f19a26ed1e003628efe6 assigned to workspace 5fb55e4826ed1e0015e846a9


In [103]:
# Update remote to local
citation = db_remote.get_citation(remote_cite.doi, verbose=True)
db_local.save_citations(citation, overwrite=True, verbose=True)

Citation retrieved from loaded citations
1 citations saved to localpath
 - 1 new citations added


In [19]:
print(db_local.citations[1].bibtex)

@article{Zhou_2015,
 abstract = {Carbon is the most widely studied material today because it exhibits special properties not seen in any other materials when in nano dimensions such as nanotube and graphene. Reduction of material defects created during synthesis has become critical to realize the full potential of carbon structures. Molecular dynamics (MD) simulations, in principle, allow defect formation mechanisms to be studied with high fidelity, and can, therefore, help guide experiments for defect reduction. Such MD simulations must satisfy a set of stringent requirements. First, they must employ an interatomic potential formalism that is transferable to a variety of carbon structures. Second, the potential needs to be appropriately parameterized to capture the property trends of important carbon structures, in particular, diamond, graphite, graphene, and nanotubes. Most importantly, the potential must predict the crystalline growth of the correct phases during direct MD simulatio

---
Copy methods used for rebuilding the CDCS entries replace underscores in titles with spaces.  This changes the spaces back to underscores. 

In [None]:
rawcites = db_remote.cdcs.query(template='Citation')
for i in rawcites.index:
    rawcite = rawcites.loc[i]
    title = rawcite.title
    if ' ' in title:
        title = title.replace(' ', '_')
        content = rawcite.xml_content
        
        db_remote.cdcs.delete_record(rawcite, verbose=True)
        db_remote.cdcs.upload_record('Citation', content=content, title=title, workspace=workspace, verbose=True)

- - -

## LAMMPS potentials

In [113]:
db_local.load_lammps_potentials(remote=False)


In [120]:
iprhub = iprPy.load_database('iprhub')

In [121]:
iprhub.build_refs(include = 'potential_LAMMPS')

--- 

## Results records

In [122]:
db_remote.cdcs.assign_records(workspace, template='PotentialProperties', verbose=True)

record 60661c6926ed1e003628f077 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd183026ed1e0035cff3ce assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd182f26ed1e003bcff3bf assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd182e26ed1e0036cff3ce assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd182d26ed1e003dcff41e assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca92226ed1e002acff4cb assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd182c26ed1e0039cff41a assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca92026ed1e0037cff518 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca92026ed1e003ccff4e3 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca91b26ed1e003ccff4df assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd182b26ed1e0035cff3ca assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca91926ed1e0035cff58d assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd182a26ed1e003bcff3bb assigned to workspac

record 5fdd17e226ed1e003ccff3f6 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd17e126ed1e003dcff3f6 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd17e026ed1e003acff3f2 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca82826ed1e0038cff432 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd17df26ed1e0039cff3fa assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca82426ed1e0035cff581 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd17de26ed1e002acff3f3 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca82126ed1e0035cff57d assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca82026ed1e0036cff55d assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd17dd26ed1e003ccff3f2 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca81826ed1e0035cff579 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd17dc26ed1e003dcff3f2 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd17db26ed1e003acff3ee assigned to workspac

record 5fdd179926ed1e003dcff3be assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca73426ed1e003acff624 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd179826ed1e003acff3ba assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd179726ed1e0039cff3c2 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd179626ed1e002acff3bb assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca72926ed1e0037cff500 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd179526ed1e003ccff3ba assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca72526ed1e0037cff4fc assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd179426ed1e003dcff3ba assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd179226ed1e003acff3b6 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd179126ed1e0039cff3be assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca72026ed1e0039cff570 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca71b26ed1e003acff620 assigned to workspac

record 603eac2726ed1e0039cff5a8 assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac2626ed1e0038cff472 assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac2526ed1e0039cff5a4 assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac2426ed1e0038cff46e assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac2326ed1e002acff581 assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac2226ed1e0038cff46a assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac2126ed1e002acff57d assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac2026ed1e0038cff466 assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac1e26ed1e003bcff46e assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac1d26ed1e003ccff525 assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac1c26ed1e002acff579 assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac1b26ed1e0036cff599 assigned to workspace 5fb55e4826ed1e0015e846a9
record 603eac1a26ed1e003dcff6c4 assigned to workspac

record 5fb5609c26ed1e003b171425 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5605526ed1e00351713f3 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5600a26ed1e003a1713a4 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5603d26ed1e003b1713d0 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5607026ed1e0035171413 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5607126ed1e003c1713e9 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5602d26ed1e003d1713d3 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5602a26ed1e003c1713b2 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5609a26ed1e003d171435 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb560b426ed1e003d17144b assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb560e326ed1e0035171479 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5608a26ed1e003c171404 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5611626ed1e003d17149d assigned to workspac

record 5fb5606d26ed1e003c1713e5 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5606926ed1e003517140f assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb560e426ed1e003d171473 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5613026ed1e00351714bb assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb55fff26ed1e003c17138f assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5607326ed1e003a171401 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb560ed26ed1e003c17146a assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5601526ed1e00351713af assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5600426ed1e003b171393 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5610b26ed1e003b17149d assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb560cf26ed1e0035171468 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5603826ed1e003b1713cb assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb560e126ed1e003b17146a assigned to workspac

record 5fb5601226ed1e003d1713ba assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5602626ed1e003c1713ae assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5601826ed1e003d1713be assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5607626ed1e003d171410 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5605126ed1e003d1713f2 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb560fd26ed1e003b171484 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5614726ed1e003c1714b1 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5604d26ed1e003d1713ee assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5601a26ed1e00351713b3 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5613426ed1e003b1714c6 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5604426ed1e003b1713d4 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb5609526ed1e003a17142a assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fb560f926ed1e003517148f assigned to workspac

record 5fdd185726ed1e0036cff3ea assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd185626ed1e003ccff436 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd185526ed1e003dcff43a assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd185426ed1e0037cff3db assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd185226ed1e0039cff432 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd185126ed1e0036cff3e6 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5ffca97926ed1e003dcff4ed assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd185026ed1e003ccff432 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd184f26ed1e003dcff436 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd184e26ed1e0037cff3d7 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd184d26ed1e0039cff42e assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd184c26ed1e0036cff3e2 assigned to workspace 5fb55e4826ed1e0015e846a9
record 5fdd184b26ed1e003ccff42e assigned to workspac

In [124]:
iprPy.Settings().list_databases

['master',
 'iprhub',
 'master_local',
 'library_local',
 'potentials',
 'wsl',
 'test']