# Wyckoff from CSD Entry

12th May - A notebook for converting a CSD entry to .cif and then to an aflow label.

Read CSD entry and write to .cif

In [57]:
from ccdc import io, utilities

csd_reader = io.EntryReader('/Applications/CCDC/CSD_2022/DATA/CSD_543/as543be_ASER.sqlite')
# entry = csd_reader.entry('CAXZEG')
# entry = csd_reader.entry('ADAGES')

entry_name = 'AHUNEW'
entry = csd_reader.entry(entry_name)
crystal = entry.crystal

print(f'SMILES: {crystal.molecule.smiles}')
print(f'Crystal System: {crystal.crystal_system}')
print(f'Spacegroup Symbol: {crystal.spacegroup_symbol}')
print(f'Spacegroup Number: {crystal.spacegroup_number_and_setting}')
print(f'Has disorder: {crystal.has_disorder}')
print(f'Disorder details: {entry.disorder_details}')


print('\n'.join('%-17s %s' % (op, utilities.print_set(crystal.atoms_on_special_positions(op))) for op in crystal.symmetry_operators))

cif_string = crystal.to_string(format='cif')
with open('tmp.cif', 'w') as f:
    f.write(cif_string)


SMILES: None
Crystal System: tetragonal
Spacegroup Symbol: P4/ncc
Spacegroup Number: (130, 2)
Has disorder: False
Disorder details: None
x,y,z             {Atom(C1), Atom(C1C), Atom(C1H), Atom(C1K), Atom(C2), Atom(C2C), Atom(C2H), Atom(C2K), Atom(C3), Atom(C3C), Atom(C3H), Atom(C3K), Atom(C4), Atom(C4C), Atom(C4H), Atom(C4K), Atom(C5), Atom(C5C), Atom(C5H), Atom(C5K), Atom(Cu1), Atom(Cu1N), Atom(Cu1N), Atom(F1), Atom(F2), Atom(F2A), Atom(F2B), Atom(F2G), Atom(F3), Atom(H1), Atom(H1C), Atom(H1H), Atom(H1K), Atom(H2), Atom(H2C), Atom(H2H), Atom(H2K), Atom(H3), Atom(H3C), Atom(H3H), Atom(H3K), Atom(H4), Atom(H4C), Atom(H4H), Atom(H4K), Atom(N1), Atom(N1C), Atom(N1F), Atom(N1G), Atom(N1H), Atom(N1K), Atom(O1), Atom(O1F), Atom(Ti1)}
1/2-y,x,z         {Atom(F1), Atom(F3), Atom(Ti1)}
y,1/2-x,z         {Atom(F1), Atom(F3), Atom(Ti1)}
1/2+y,1/2+x,1/2-z set()
1/2+x,-y,1/2-z    set()
-x,1/2+y,1/2-z    set()
-y,-x,1/2-z       set()
1/2-x,1/2-y,z     {Atom(F1), Atom(F3), Atom(Ti1)}
-x,-y,-z        

Load pymatgen structure from .cif

In [64]:
from wren_code import utils
from pymatgen.core import Composition, Structure
from pymatgen.io.cif import CifParser, CifFile

class CifStringParser(CifParser):
    
    def __init__(self, cif_string, occupancy_tolerance=1.0, site_tolerance=1e-4):
        """
        Args:
            filename (str): CIF filename, bzipped or gzipped CIF files are fine too.
            occupancy_tolerance (float): If total occupancy of a site is between 1
                and occupancy_tolerance, the occupancies will be scaled down to 1.
            site_tolerance (float): This tolerance is used to determine if two
                sites are sitting in the same position, in which case they will be
                combined to a single disordered site. Defaults to 1e-4.
        """
        self._occupancy_tolerance = occupancy_tolerance
        self._site_tolerance = site_tolerance
        if isinstance(cif_string, (str,)):
            self._cif = CifFile.from_string(cif_string)
        else:
            raise TypeError('cif_string needs to be a string!')
        # store if CIF contains features from non-core CIF dictionaries
        # e.g. magCIF
        self.feature_flags = {}
        self.warnings = []
        
        def is_magcif():
            """
            Checks to see if file appears to be a magCIF file (heuristic).
            """
            # Doesn't seem to be a canonical way to test if file is magCIF or
            # not, so instead check for magnetic symmetry datanames
            prefixes = [
                "_space_group_magn",
                "_atom_site_moment",
                "_space_group_symop_magn",
            ]
            for d in self._cif.data.values():
                for k in d.data.keys():
                    for prefix in prefixes:
                        if prefix in k:
                            return True
            return False

        self.feature_flags["magcif"] = is_magcif()

        def is_magcif_incommensurate():
            """
            Checks to see if file contains an incommensurate magnetic
            structure (heuristic).
            """
            # Doesn't seem to be a canonical way to test if magCIF file
            # describes incommensurate strucure or not, so instead check
            # for common datanames
            if not self.feature_flags["magcif"]:
                return False
            prefixes = ["_cell_modulation_dimension", "_cell_wave_vector"]
            for d in self._cif.data.values():
                for k in d.data.keys():
                    for prefix in prefixes:
                        if prefix in k:
                            return True
            return False

        self.feature_flags["magcif_incommensurate"] = is_magcif_incommensurate()

        for k in self._cif.data.keys():
            # pass individual CifBlocks to _sanitize_data
            self._cif.data[k] = self._sanitize_data(self._cif.data[k])
            
struct = CifStringParser(cif_string, occupancy_tolerance=10).get_structures()[0]
# struct = CifParser('tmp.cif', occupancy_tolerance=10).get_structures()[0]
# struct = Structure.from_str(cif_string, fmt="cif", merge_tol=0.1)
wyckoff_label = utils.get_aflow_label_spglib(struct)
print(wyckoff_label)

A20BC6D16E4F2G_tP200_130_5g_a_2cg_4g_g_f_c:C-Cu-F-H-N-O-Ti
