In [47]:
import pymatgen
import distutils
import pymatgen.io.cif
import tempfile
import subprocess
def stidy(structure: pymatgen.core.Structure) -> [pymatgen.core.Structure]:
    '''
    Run STRUCTURE TIDY as implemented in the PLATON software package.
    PLATON must either be in the PATH or in ../bin.
    
    References:
        A. L. Spek (2009). Acta Cryst., D65, 148-155. 
        E. Parthé and L. M. Gelato (1984). Acta Cryst., A40, 169-183.
        L. M. Gelato and E. Parthé (1987). J. Appl. Cryst. 20, 139-143.
        S-Z. Hu and E. Parthé (2004). Chinese J. Struct. Chem. 23, 1150-1160.
    
    Args:
        structure (pymatgen.core.Structure): Pymatgen Structure object for the (probably) untidy structure
    Returns:
        pymatgen.core.Structure: STRUCTURE TIDY standardized structure in Pymatgen object form
        
    '''
    PLATON = distutils.spawn.find_executable('platon')
    if not PLATON: 
        PLATON = '../bin/platon'

    structure_cif = str(pymatgen.io.cif.CifWriter(structure))
    with tempfile.NamedTemporaryFile(suffix='.cif') as temp_file:
        temp_file.write(bytes(structure_cif, encoding='utf-8'))
        temp_file.flush()
        platon_process = subprocess.Popen(['platon', '-o', temp_file.name],
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.STDOUT,
                                          stdin=subprocess.PIPE)
        platon_data = platon_process.communicate(input=b'STIDY')
    platon_output = platon_data[0].decode('utf-8')
    
    stidy_structures = StidyParser(platon_output).structure
    
    return stidy_structures

In [67]:
import pymatgen
import re
class StidyParser(object):
    def __init__(self: object, stidy_output: str) -> None:
        self.output = stidy_output
        return
    
    @property
    def formula(self: object) -> str:
        '''str: Reduced chemical formula'''
        regexp = re.compile('Structure Tidy Results for\s*(\w*)')
        match = regexp.search(self.output)
        return match.group(1)

    @property
    def space_group(self: object) -> str:
        '''str: Space group descriptor e.g. "P 1"'''
        regexp = re.compile('Structure Tidy Results for\s*\w*\s*(.*)')
        match = regexp.search(self.output)
        return str(match.group(1).strip())

    @property
    def axes_change(self: object) -> (str):
        '''tuple: New axes in terms of originals e.g. ('a', 'b+c', 'b')'''
        regexp = re.compile('Axes changed to : (.*)')
        match = regexp.search(self.output)
        return tuple(match.group(1).strip().split(','))

    @property
    def pearson(self: object) -> str:
        '''str: Pearson code e.g. "aP"'''
        regexp = re.compile('Pearson code : (\w*)')
        match = regexp.search(self.output)
        return match.group(1)

    @property
    def cell(self: object) -> ((float)):
        '''tuple: abc and angles of the standardized cell'''
        regexp = re.compile('^Cell :.*$', re.MULTILINE)
        match = regexp.search(self.output).group(0)
        abc = tuple(map(float, match.split(':')[-1].strip().split()[:3]))
        angles = tuple(map(float, match.split(':')[-1].strip().split()[3:]))
        return (abc, angles)

    @property
    def number_in_it(self: object) -> int:
        '''int: ???'''
        regexp = re.compile('\s*Number in IT :\s*(\d+)')
        match = regexp.search(self.output)
        return int(match.group(1))
    
    @property
    def setting(self: object) -> [(str)]:
        '''
        list: ???
              One entry per output structure
        '''
        regexp = re.compile('Setting\s*([-\w]*),([-\w]*),([-\w]*)')
        return regexp.findall(self.output)
    
    @property
    def origin(self: object) -> [(float)]:
        '''
        list: New origin in the old cell
        One entry per output structure
        '''
        regexp = re.compile('Origin\s*\(.*\)')
        matches = regexp.findall(self.output)
        for i, match in enumerate(matches):
            matches[i] = tuple(map(float, match.strip('() ').split()[2:]))
        return matches
    
    @property
    def gamma(self: object) -> [float]:
        '''
        list: Gamma values for standardization minimization
              One entry per output structure
        '''
        regexp = re.compile('Gamma\s*=\s*.*')
        matches = regexp.findall(self.output)
        return [float(match.split()[-1]) for match in matches]
    
    @property
    def sites(self: object) -> list:
        '''
        list: Site data including:
                numbered species
                wyckoff site
                x y z
                species
                number
        One set of sites per output structure
        '''
        lines = self.output.split('\n')
        raw_site_collections = []
        for l, line in enumerate(lines):
            if {'Setting', 'Origin', 'Gamma'}.issubset(set(line.split())):
                raw_site_collection = []
                i = l + 2
                while lines[i].strip():
                    raw_site = lines[i].strip().split()
                    raw_site[2:5] = list(map(float, raw_site[2:5])) # float xyz
                    # raw_site[2:5] = [round(coord, 6) for coord in raw_site[2:5]] # round xyz
                    raw_site_collection.append(raw_site)
                    i += 1
                raw_site_collections.append(raw_site_collection)
        return raw_site_collections
            
    @property
    def wyckoff(self: object) -> [(str)]:
        '''
        list: Wyckoff occupations
        One set of sites per output structure
        '''
        regexp = re.compile('Wyckoff sequence :\s*(.*)')
        matches = regexp.findall(self.output)
        return [tuple([m.strip() for m in match.split()]) for match in matches]
    
    @property
    def summary_and_remarks(self: object) -> str:
        '''str: Unprocessed summary and remarks cut from output file'''
        regexp = re.compile('^Summary and Remarks.*-\n', re.MULTILINE|re.DOTALL)
        return regexp.search(platon_output).group(0)
    
    @property
    def structure(self: object) -> [pymatgen.core.Structure]:
        '''
        pymatgen.core.Structure: Structure object
        One entry per output structure
        '''
        structures = []
        cell = self.cell
        for raw_sites in self.sites:
            species = [raw_site[-2] for raw_site in raw_sites]
            fractional_coords = [tuple(raw_site[2:5]) for raw_site in raw_sites]
            lattice = pymatgen.Lattice.from_lengths_and_angles(abc=cell[0], ang=cell[1])
            structure = pymatgen.Structure(lattice=lattice,
                                           species=species,
                                           coords=fractional_coords)
            structures.append(structure)
        return structures

In [68]:
import pymatgen
import spglib
def wyckoff_fingerprint(structure: pymatgen.core.Structure) -> str:
    SYMPREC = 1e-5
    ANGLE_TOLERANCE = -1
    lattice = structure.lattice.matrix
    positions = structure.frac_coords
    numbers = [site.specie.Z for site in structure.sites]
    cell = (lattice, positions, numbers)
    space_group = spglib.get_spacegroup(cell, symprec=SYMPREC)
    space_group_symbol = space_group.split()[0]
    space_group_number = space_group.split()[-1].strip('()')
    symmetry = spglib.get_symmetry(cell, symprec=SYMPREC)
    hall_number = spglib.get_hall_number_from_symmetry(symmetry['rotations'],
                                                       symmetry['translations'],
                                                       symprec=SYMPREC)
    symmetry_dataset = spglib.get_symmetry_dataset(cell, symprec=SYMPREC,
                                                   angle_tolerance=ANGLE_TOLERANCE,
                                                   hall_number=hall_number)
    wyckoffs = symmetry_dataset['wyckoffs']
    return str(space_group_number) + '_' + '_'.join(list(map(str, wyckoffs)))

In [69]:
structure = pymatgen.io.cif.CifParser('tmp.cif').get_structures()[0]
raw_fingerprint = stidy_fingerprint(structure)
tidy_structures = stidy(structure)
tidy_fingerprint = wyckoff_fingerprint(tidy_structures[1])
print('raw:', raw_fingerprint)
print('tidy:', tidy_fingerprint)

raw: 164_d_d_d_d_d_d_d_d_d_d_d_d
tidy: 12_i_i_i_i_i_i_i_i_i_i_i_i


In [70]:
display(structure)
display(tidy_structures[0])

Structure Summary
Lattice
    abc : 3.1903153900000008 3.1903153900000003 39.758009
 angles : 90.0 90.0 120.00000000000001
 volume : 350.44712155778075
      A : -1.5951576950000006 -2.7628941738244595 -3.9070095306340396e-16
      B : -1.5951576949999993 2.7628941738244595 1.9535047653170198e-16
      C : 0.0 0.0 -39.758009
PeriodicSite: Mo (-1.5952, -0.9210, -23.5988) [0.6667, 0.3333, 0.5936]
PeriodicSite: Mo (-1.5952, -0.9210, -8.7197) [0.6667, 0.3333, 0.2193]
PeriodicSite: Mo (-1.5952, 0.9210, -31.0383) [0.3333, 0.6667, 0.7807]
PeriodicSite: Mo (-1.5952, 0.9210, -16.1592) [0.3333, 0.6667, 0.4064]
PeriodicSite: S (-1.5952, 0.9210, -22.0338) [0.3333, 0.6667, 0.5542]
PeriodicSite: S (-1.5952, 0.9210, -7.1549) [0.3333, 0.6667, 0.1800]
PeriodicSite: S (-1.5952, -0.9210, -29.4734) [0.6667, 0.3333, 0.7413]
PeriodicSite: S (-1.5952, -0.9210, -14.5944) [0.6667, 0.3333, 0.3671]
PeriodicSite: S (-1.5952, 0.9210, -25.1636) [0.3333, 0.6667, 0.6329]
PeriodicSite: S (-1.5952, 0.9210, -10.2846) [0

Structure Summary
Lattice
    abc : 3.1903 3.1903 39.758
 angles : 90.0 90.0 119.99999999999999
 volume : 350.44366114023046
      A : 3.1903 0.0 1.9534953416599006e-16
      B : -1.5951499999999994 2.7628808456934952 1.9534953416599006e-16
      C : 0.0 0.0 39.758
PeriodicSite: S (0.0000, 0.0000, 0.0000) [0.0000, 0.0000, 0.0000]
PeriodicSite: S (0.0000, 0.0000, 3.1297) [0.0000, 0.0000, 0.0787]
PeriodicSite: Mo (0.0000, 0.0000, 9.0044) [0.0000, 0.0000, 0.2265]
PeriodicSite: S (0.0000, 0.0000, 14.8790) [0.0000, 0.0000, 0.3742]
PeriodicSite: S (0.0000, 0.0000, 18.0088) [0.0000, 0.0000, 0.4530]
PeriodicSite: Mo (0.0000, 0.0000, 23.8834) [0.0000, 0.0000, 0.6007]
PeriodicSite: Mo (1.5952, 0.9210, 1.5649) [0.6667, 0.3333, 0.0394]
PeriodicSite: S (1.5952, 0.9210, 7.4395) [0.6667, 0.3333, 0.1871]
PeriodicSite: S (1.5952, 0.9210, 10.5693) [0.6667, 0.3333, 0.2658]
PeriodicSite: Mo (1.5952, 0.9210, 16.4439) [0.6667, 0.3333, 0.4136]
PeriodicSite: S (1.5952, 0.9210, 22.3186) [0.6667, 0.3333, 0.5614

In [60]:
!echo STIDY | platon -o tmp.cif


Current Graphics Settings: Online Display  >> on ; DISPLAY   CODE = X11    
===(See HELP GRAPHICS)===: Meta File       >> off; META FILE  CODE = PS     

:: Data from: tmp.cif

:: Restricted CIF-File Format assumed (Automatic NOMOVE effective) 

:: Data Set MoS2                                         

::                             S.e.l.e.c.t.e.d  I.n.s.t.r.u.c.t.i.o.n.s
:: ************************  CALC for an exhaustive geometry calculation
:: *        PLATON        *  PLOT ADP for default labeled ORTEP-look-alike
:: *    A Multipurpose    *  LEPAGE to check for higher metrical symmetry
:: *   Crystallographic   *  CALC ADDSYM for a check for MISsed SYMmetry
:: *         Tool         *  CALC NONSYM for a non-cryst. symm. check
:: *          --          *  CALC SOLV to search for missed solvent areas
:: *(C) 1980-2018 A.L.Spek*  CALC SQUEEZE to handle disordered solvents
:: *          --          *  PLOT NEWMAN for NEWMAN-Projection Plots
:: *   version : 140518   *  LIST RADII fo