In [12]:
# 構造のメタデータを保存するDBを作成する

In [1]:
from sqlalchemy import create_engine, Column, Integer, String, Sequence
from sqlalchemy.orm import declarative_base
import uuid
from sqlalchemy.orm import sessionmaker
import os
import shutil
from mlptools.io.read import read_from_format
import pickle

path2db = "/Users/y1u0d2/desktop/Lab/data/qe_data/Si"

Base = declarative_base()

class Structure(Base):
    __tablename__ = 'structure'
    id = Column(String, primary_key=True)
    original_path = Column(String, unique=True)
    structure_id = Column(Integer)
    structure_name = Column(String)
    calculation_type = Column(String)

# データベースエンジンの作成とテーブルの初期化
engine = create_engine(f'sqlite:///{path2db}/structure.db')
Base.metadata.create_all(engine)

# データの追加

In [2]:
from glob import glob
import re

def get_mpid_from_path(path):
    match = re.search(r"mp-\d+", path)
    if match:
        result = match.group(0)
    else:
        result = None
    return result

def generate_unique_uuid(session):
    while True:
        new_uuid = str(uuid.uuid4())
        existing = session.query(Structure).filter_by(id=new_uuid).first()
        if not existing:
            return new_uuid

In [3]:
# path2scf = "/Users/y1u0d2/desktop/Lab/result/qe/SiO2/mp-dimer/Si-O/spin/result"
path2scf = "/Users/y1u0d2/Google Drive/マイドライブ/HamaLab_kotani/QE/Si/amorphous/relax/export"
path2atoms = "/Users/y1u0d2/desktop/Lab/data/qe_data/Si/atoms"
calculation_type = "relax"
structure_name = "amorphous"
mpid = None

all_scf_dirs = []
all_scf_dirs.extend(glob(f'{path2scf}/relax_*/atoms/atoms_*'))
# all_scf_dirs.extend(glob(f'{path2scf}/*'))
print(f"Number of candidate atoms: {len(all_scf_dirs)}")

Number of candidate atoms: 5843


In [4]:
Session = sessionmaker(bind=engine)
session = Session()

for i, scf_dir in enumerate(all_scf_dirs):
    print(f"Processing {i+1}/{len(all_scf_dirs)}")
    # ディレクトリのバリデーション
    try:
        if os.path.exists(os.path.join(scf_dir, "atoms.pkl")):
            print(f"atoms.pkl already exists {scf_dir}")
            atoms = pickle.load(open(f"{scf_dir}/atoms.pkl", 'rb'))
        else:
            atoms = read_from_format(
                path2target=scf_dir,
                format='espresso-in',
                structure_id=mpid
            )
            with open(f"{scf_dir}/atoms.pkl", 'wb') as f:
                pickle.dump(atoms, f)   
            print(f"Successfully created atoms.pkl {scf_dir}") 
    except Exception as e:
        print(e)
        print(f"[Error]: Invalid scf directory {scf_dir}")
        # shutil.rmtree(scf_dir)
        # print(f"[Remove dir]: {scf_dir}")
        continue

    # DBへ追加
    unique_id = generate_unique_uuid(session)
    try:
        structure = Structure(
            id=unique_id,
            original_path=scf_dir, 
            structure_id=mpid, 
            structure_name=structure_name,
            calculation_type=calculation_type
        )
        session.add(structure)
        session.commit()

        # create directory and copy files
        path2target = os.path.join(path2atoms, structure.id)
        # copy scf.in, scf.out and atoms.pkl
        os.makedirs(path2target)
        if calculation_type == "scf":
            shutil.copy(os.path.join(scf_dir, "scf.in"), path2target)
            shutil.copy(os.path.join(scf_dir, "scf.out"), path2target)
            shutil.copy(os.path.join(scf_dir, "atoms.pkl"), path2target)
        elif calculation_type == "relax":
            shutil.copy(os.path.join(scf_dir, "atoms.pkl"), path2target)
        print(f"[DB COMMIT SUCCESS]: {path2target}")
    except Exception as e:
        print(f"[Error]: {e}")
        session.rollback()

session.close()

Processing 1/5843
atoms.pkl already exists /Users/y1u0d2/Google Drive/マイドライブ/HamaLab_kotani/QE/Si/amorphous/relax/export/relax_4184680c-3130-4a29-9fc0-7f18947a26ca/atoms/atoms_11
[DB COMMIT SUCCESS]: /Users/y1u0d2/desktop/Lab/data/qe_data/Si/atoms/226a7c1c-4463-48e0-8d9c-3ed7a25d80a9
Processing 2/5843
atoms.pkl already exists /Users/y1u0d2/Google Drive/マイドライブ/HamaLab_kotani/QE/Si/amorphous/relax/export/relax_4184680c-3130-4a29-9fc0-7f18947a26ca/atoms/atoms_16
[DB COMMIT SUCCESS]: /Users/y1u0d2/desktop/Lab/data/qe_data/Si/atoms/0d9441e6-d921-4631-948a-02d9d0dced9c
Processing 3/5843
atoms.pkl already exists /Users/y1u0d2/Google Drive/マイドライブ/HamaLab_kotani/QE/Si/amorphous/relax/export/relax_4184680c-3130-4a29-9fc0-7f18947a26ca/atoms/atoms_29
[DB COMMIT SUCCESS]: /Users/y1u0d2/desktop/Lab/data/qe_data/Si/atoms/d3b375b5-c981-45a9-8935-1a2b215abbb7
Processing 4/5843
atoms.pkl already exists /Users/y1u0d2/Google Drive/マイドライブ/HamaLab_kotani/QE/Si/amorphous/relax/export/relax_4184680c-3