In [None]:
import torch
from collections import defaultdict
import os

def analyze_samples_pt(file_path):
    """
    分析 .pt 文件的内容并打印关键信息
    """
    if not os.path.exists(file_path):
        print(f"文件 {file_path} 不存在，请检查路径！")
        return

    print(f"加载文件：{file_path}")
    data = torch.load(file_path)
    
    if not isinstance(data, dict):
        print(f"文件内容不是字典类型，而是 {type(data)}")
        return

    print("文件内容为字典，包含以下键：")
    for key in data.keys():
        print(f"- {key}: 数据类型 {type(data[key])}")

    # 分析具体的键内容
    if "mofs" in data:
        print(f"'mofs' 包含 {len(data['mofs'])} 个 MOF 数据条目。")
        # 示例分析
        if len(data['mofs']) > 0:
            print(f"第一个 MOF 的内容类型为：{type(data['mofs'][0])}")
            print(f"第一个 MOF 的内容示例：{data['mofs'][0]}")
            print(f"第一个 MOF ：{data['mofs'][0]['all_atom_coords']}")
    
    if "z" in data:
        print(f"'z' 是潜在空间向量（latent vectors），形状为 {data['z'].shape}。")
        print(f"前几个 'z' 向量：{data['z'][:3]}")
    
    # 如果需要保存或进一步分析
    output_dir = "analyzed_results"
    os.makedirs(output_dir, exist_ok=True)
    if "z" in data:
        torch.save(data["z"], os.path.join(output_dir, "latent_vectors.pt"))
        print(f"已保存 'z' 到 {output_dir}/latent_vectors.pt")

if __name__ == "__main__":
    # 修改为你的文件路径
    file_path = "samples_128_seed_42/samples.pt"
    analyze_samples_pt(file_path)


In [None]:
import os
import shutil

def copy_and_rename_cif_files(folder_list, target_folder):
    """
    将给定文件夹列表中的所有cif文件复制到目标文件夹，并重命名文件。
    
    folder_list: List of strings, each representing a folder path (with seed value in path)
    target_folder: String, the target folder where the cif files will be copied
    """
    # 确保目标文件夹存在
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)
    
    for folder_path in folder_list:
        # 从路径中提取种子值 X（假设路径包含字符串 'samples_4096_seed_{X}'）
        seed_value = folder_path.split("samples_4096_seed_")[1]
        cif_folder = os.path.join(folder_path, "cif")  # cif子文件夹
        
        if os.path.exists(cif_folder):
            # 遍历cif文件夹中的所有.cif文件
            for filename in os.listdir(cif_folder):
                if filename.endswith(".cif"):
                    # 获取原文件的完整路径
                    src_file_path = os.path.join(cif_folder, filename)
                    
                    # 生成新的文件名并获取目标文件路径
                    base_filename = os.path.splitext(filename)[0]
                    new_filename = f"{base_filename}_{seed_value}.cif"
                    dst_file_path = os.path.join(target_folder, new_filename)
                    
                    # 复制并重命名文件
                    shutil.copy2(src_file_path, dst_file_path)
                    print(f"复制: {src_file_path} -> {dst_file_path}")
        else:
            print(f"警告: {cif_folder} 不存在。")

start_seed = 1
end_seed = 87
base_path = '/data/user2/wty/HOF/MOFDiff/mofdiff/data/mof_models/mof_models/bwdb_hoff/samples_4096_seed_'
folder_list = [f"{base_path}{seed}" for seed in range(start_seed, end_seed + 1)]


target_folder = '/data/user2/wty/HOF/MOFDiff/mofdiff/data/mof_models/mof_models/bwdb_hoff/temp_all'

# 调用函数
copy_and_rename_cif_files(folder_list, target_folder)


In [None]:
from ase import Atoms
from ase.io import read
from ase.geometry import neighbor_list
import numpy as np

def detect_hydrogen_bonds(cif_file, max_distance=3.5, min_distance=2.4, min_angle=150):
    """
    使用 ASE 分析晶体结构中的潜在氢键。
    
    Args:
        cif_file (str): CIF 文件路径。
        max_distance (float): 氢键供体和受体的最大距离 (Å)。
        min_distance (float): 氢键供体和受体的最小距离 (Å)。
        min_angle (float): 氢键形成的最小角度（单位：度）。
        
    Returns:
        list: 检测到的氢键信息，每项为 (供体原子, 受体原子, 氢原子, 距离, 角度)。
    """
    # 读取结构
    structure = read(cif_file)
    
    # 获取所有原子的符号
    symbols = structure.get_chemical_symbols()
    
    # 定义潜在的氢键供体和受体原子类型
    potential_donors = ["O", "N"]  # 氢键供体原子（连接氢的原子）
    potential_acceptors = ["O", "N", "F"]  # 氢键受体原子
    hydrogen = "H"

    # 邻居列表分析
    indices_i, indices_j, distances = neighbor_list(
        "ijd", structure, cutoff=max_distance
    )

    # 存储氢键信息
    hydrogen_bonds = []

    for i, j, d in zip(indices_i, indices_j, distances):
        atom_i = symbols[i]
        atom_j = symbols[j]

        # 判断是否是供体和受体的潜在组合
        if atom_i in potential_donors and atom_j in potential_acceptors:
            # 检查供体是否有氢原子
            donor_neighbors = neighbor_list("j", structure, cutoff=1.2, atoms=[i])
            hydrogen_indices = [n for n in donor_neighbors if symbols[n] == hydrogen]
            if hydrogen_indices:
                for h_index in hydrogen_indices:
                    # 检查角度条件（供体-氢-受体角度）
                    donor_vector = structure.positions[h_index] - structure.positions[i]
                    acceptor_vector = structure.positions[j] - structure.positions[h_index]
                    angle = calculate_angle(donor_vector, acceptor_vector)
                    
                    if min_distance <= d <= max_distance and angle >= min_angle:
                        hydrogen_bonds.append(
                            (symbols[i], symbols[j], symbols[h_index], d, angle)
                        )

    return hydrogen_bonds

def calculate_angle(vec1, vec2):
    """
    计算两个向量之间的夹角（单位：度）。
    """
    cos_theta = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
    return np.degrees(np.arccos(cos_theta))


# 示例：检测氢键
cif_file = "/data/user2/wty/HOF/MOFDiff/mofdiff/data/mof_models/mof_models/bwdb_hoff/samples_4096_seed_8/cif/sample_57.cif"
hydrogen_bonds = detect_hydrogen_bonds(cif_file)

if hydrogen_bonds:
    print(f"检测到 {len(hydrogen_bonds)} 个潜在氢键：")
    for donor, acceptor, hydrogen, distance, angle in hydrogen_bonds:
        print(
            f"供体: {donor}, 受体: {acceptor}, 氢: {hydrogen}, 距离: {distance:.2f} Å, 角度: {angle:.2f}°"
        )
else:
    print("未检测到氢键。")


In [None]:
from pymatgen.core import Structure
from pymatgen.analysis.local_env import CrystalNN

# 读取 CIF 文件
structure = Structure.from_file("/data/user2/wty/HOF/MOFDiff/mofdiff/data/mof_models/mof_models/bwdb_hoff/samples_4096_seed_8/cif/sample_57.cif")

# 初始化晶体近邻分析器
cnn = CrystalNN()

# 定义氢键的最大距离阈值（单位：Å）
hbond_distance_threshold = 3.5

# 遍历结构中的所有原子
for i, site in enumerate(structure):  # 使用 enumerate 获取索引
    # 检查当前原子是否为氢原子
    if site.specie.symbol == "H":
        # 获取该原子的近邻信息
        neighbors = cnn.get_nn_info(structure, i)  # 使用索引 i 而不是 site.index
        for neighbor in neighbors:
            # 判断近邻原子是否为可能的氢键受体（如氧原子）
            if neighbor["site"].specie.symbol in ["O", "N", "F"]:
                # 获取 H 原子与受体原子之间的距离
                distance = neighbor["weight"]  # 注意：这里的距离单位为Å
                # 判断距离是否在氢键范围内
                if distance <= hbond_distance_threshold:
                    print(f"检测到氢键：H原子在位置 {site.frac_coords} 与 {neighbor['site'].specie.symbol} 原子在位置 {neighbor['site'].frac_coords}，距离为 {distance:.2f} Å")


In [None]:
from openbabel import openbabel as ob
import numpy as np
import re
from pathlib import Path
import argparse
import pickle
import pandas as pd
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
from mofdiff.common.constants import COVALENT_RADII

def has_no_overlapping_atoms(cif_path, threshold=0.7):
    """
    判断给定的 CIF 文件中是否有重叠的原子。如果没有重叠原子则返回 True，否则返回 False。

    :param cif_path: CIF 文件路径
    :param threshold: 判定原子是否重叠的阈值，默认为 0.7
    :return: 没有重叠原子返回 True，有重叠原子返回 False
    """
    print(f"Checking {cif_path} for overlapping atoms.")
    obConversion = ob.OBConversion()
    obConversion.SetInFormat("cif")
    mol = ob.OBMol()

    if not obConversion.ReadFile(mol, cif_path):
        print(f"Failed to read {cif_path} file.")
        return False

    # 分离出所有连通分支
    fragments = mol.Separate()

    for frag in fragments:
        frag_mol = ob.OBMol(frag)
        other_atoms = []

        # 遍历分子中的每个原子，检查原子间是否有重叠
        for atom in ob.OBMolAtomIter(frag_mol):
            pos = np.array([atom.GetX(), atom.GetY(), atom.GetZ()])
            e1 = atom.GetType()
            
            for other_atom in other_atoms:
                other_pos = np.array([other_atom.GetX(), other_atom.GetY(), other_atom.GetZ()])
                e2 = other_atom.GetType()
                
                # 去掉e1e2的数字，只留下字母
                e1 = ''.join([i for i in e1 if not i.isdigit()])
                e2 = ''.join([i for i in e2 if not i.isdigit()])
                # 根据原子类型，计算它们的共价半径
                try:
                    min_threshold = min(COVALENT_RADII[e1], COVALENT_RADII[e2])
                except KeyError as e:
                    # print(f"Warning: Unrecognized atom type '{e.args[0]}' encountered.")
                    continue  # Skip or handle the unrecognized atom type
                if np.linalg.norm(pos - other_pos) < threshold * min_threshold:
                    return False  # 找到重叠的原子，直接返回 False

            other_atoms.append(atom)

    return True  # 没有重叠原子，返回 True

cif_path = "/data/user2/wty/HOF/MOFDiff/mofdiff/data/mof_models/mof_models/bwdb_hoff/samples_4096_seed_8/cif/sample_57.cif"
print(has_no_overlapping_atoms(cif_path))