In [143]:
import os
import numpy as np

import scipy as sp
import h5py
import json

import matplotlib.pyplot as plt
import sisl
from sisl.io import *

from utils import draw_sub_H

In [169]:

def get_data_from_siesta(interface, input_file, input_path, output_path):
    input_path = os.path.abspath(input_path) #../example/work_dir/dataset/raw/0-575，预测时是olp文件夹
    output_path = os.path.abspath(output_path) #~/example/work_dir/dataset/processed/0-575,预测时是inference文件夹
    os.makedirs(output_path, exist_ok=True) #创建用于放处理后数据的每一个子文件夹

    # finds system name
    f_list = os.listdir(input_path)
    system_name = [element.split(".")[0] for element in f_list if (".TSHS" in element) or (".HSX" in element)][0]

    #read structure file
    if interface == 'siesta':
        geom_str = input_path + "/" + input_file
        ham_str = input_path + "/" + f"{system_name}.HSX"
        geom = sisl.get_sile(geom_str).read_geometry()
        hsx = hsxSileSiesta(ham_str)
    elif interface == 'transiesta':
        geom_str = input_path + "/" + input_file
        ham_str = input_path + "/" + f"{system_name}.TSHS"
        geom = sisl.get_sile(geom_str).read_geometry()
        hsx = tshsSileSiesta(ham_str)
    H = hsx.read_hamiltonian(geometry=geom)
    S = hsx.read_overlap(geometry=geom)
    # print(H.shape)
    
    # 获取晶格矢量
    lattice = geom.lattice.cell
    # 获取原子序数
    atomic_numbers = geom.atoms.Z
    # 获取原子坐标
    atom_coord_cart = geom.xyz
    #计算倒格矢
    rlattice = geom.lattice.rcell #倒空间的格矢
    # 保存晶胞矢量、倒格矢、原子序数原子坐标位置
    np.savetxt('{}/rlat.dat'.format(output_path), np.transpose(rlattice), fmt='%.8e') #保存倒格矢量   
    np.savetxt('{}/lat.dat'.format(output_path), np.transpose(lattice), fmt='%.8e')#保存晶格矢量的转置
    np.savetxt('{}/element.dat'.format(output_path), atomic_numbers, fmt='%d') #找出所有原子的原子序数
    np.savetxt('{}/site_positions.dat'.format(output_path), atom_coord_cart) #原子位置的转置
    
    #提取基组信息，原子数基组是否正交、是否自旋、基组/轨道数量
    num_atoms = H.na
    isorthogonal = bool(H.orthogonal)
    isspinful = bool(H.spin.is_polarized)
    norbits = int(H.no)
    
    info = {'nsites': num_atoms, 'isorthogonal': isorthogonal, 'isspinful': isspinful, 'norbits': norbits}
    with open('{}/info.json'.format(output_path), 'w') as info_f:
        json.dump(info, info_f)  # 把python字典对象info转换成json对象，生成一个fp的文件流，和文件相关。

    num_orbital_per_atom = geom.orbitals
    np.savetxt('{}/num_orbital_per_atom.dat'.format(output_path), num_orbital_per_atom, fmt='%d')

    
    orb_indx = np.genfromtxt('{}/{}.ORB_INDX'.format(input_path, system_name), skip_header=3, skip_footer=17)

    #保存超胞中的超胞索引
    # all_isc = geom.o2isc(orbitals=list(range(H.shape[1])))
    # unique_isc = np.unique(all_isc, axis=0)
    # np.savetxt('{}/unique_isc.dat'.format(output_path), unique_isc, fmt='%d')
    isc_list = orb_indx[:, 12:15]
    result = []
    seen = set()
    for i in range(len(isc_list)):
        isc = str(isc_list[i])
        if isc not in seen:
            seen.add(isc)
            result.append(np.array([int(x.strip('.')) for x in isc[1:-1].split()]))
    np.savetxt('{}/unique_isc.dat'.format(output_path), result, fmt='%d')


        #提取基组或双ζ基组信息
    l_z = orb_indx[:, [1, 6, 8]]
    orbital_type = []
    seen = set()
    for i in range(1,num_atoms+1):
        result = []
        no_l_z = l_z[l_z[:,0] == i, :]
        for i in range(len(no_l_z)):
            lz = str(no_l_z[i])
            if lz not in seen:
                seen.add(lz)
                result.append(np.array([int(x.strip('.')) for x in lz[1:-1].split()])[1])
        # print(result)
        orbital_type.append(result)
    np.savetxt('{}/orbital_types.dat'.format(output_path), orbital_type, fmt='%d')


    H_block_matrix = dict()
    S_block_matrix = dict()
    seen = set()
    for i, j in H.iter_nnz():
        a_i = H.o2a(orbitals=i, unique=True) # orbit i belongs to atom_1。#表示该轨道在第一个晶胞中的等效原子的索引。
        b_j = H.o2a(orbitals=j, unique=True)
        uc_b_j = H.asc2uc(atoms=b_j)
        isc = H.a2isc(atoms=b_j)
        # print(H.tocsr().toarray()[:9,:9])
        key = '[{}, {}, {}, {}, {}]'.format(isc[0],isc[1],isc[2],a_i,uc_b_j)
        if key not in seen:
            print(key)
            seen.add(key)
            H_ab = H.sub([a_i, b_j])
            H_ab_array = H_ab.tocsr().toarray()
            H_ab_matrix = H_ab_array[:9,9:18]
            S_ab = S.sub([a_i, b_j])
            S_ab_array = S_ab.tocsr().toarray()
            S_ab_matrix = S_ab_array[:9,9:18]
            #draw_sub_H(a_i, b_j, H_ab_matrix)
            H_block_matrix[key] = H_ab_matrix
            S_block_matrix[key] = S_ab_matrix

    f = h5py.File('{}/hamiltonians.h5'.format(output_path),'w') #预测的时候读取的hamiltonians.h5实际上是无用的，因为只经过一次迭代计算
    #i=0
    for key in H_block_matrix.keys():
        f[key] = H_block_matrix[key] #9*9
    f.close()

    f = h5py.File('{}/overlaps.h5'.format(output_path),'w') #预测的时候读取的hamiltonians.h5实际上是无用的，因为只经过一次迭代计算
    #i=0
    for key in S_block_matrix.keys():
        f[key] = S_block_matrix[key] #9*9
    f.close()
    # return H, H_block_matrix, S_block_matrix
    
input_path = "/fs2/home/ndsim10/all-kinds-test/graphene-defect-transport/Pristine-armchair-168/0.0V/"
output_path = "/fs2/home/ndsim10/all-kinds-test/graphene-defect-transport/Pristine-armchair-168/0.0V/processed/"
input_file = "RUN.fdf"
interface = "transiesta"

get_data_from_siesta(interface, input_file, input_path, output_path)

[0, 0, 0, 0, 0]
[0, 0, 0, 0, 1]
[0, 0, 0, 0, 2]
[0, 0, 0, 0, 6]
[0, 0, 0, 0, 7]
[0, 0, 0, 0, 8]
[0, 0, 0, 0, 12]
[0, 0, 0, 0, 13]
[0, 0, 0, 0, 14]
[0, 0, 0, 0, 18]
[0, 0, 0, 0, 19]
[0, 0, 0, 0, 24]
[0, 0, 0, 0, 25]
[0, 0, 0, 0, 30]
[0, 0, 0, 0, 31]
[-1, 0, -1, 0, 149]
[-1, 0, -1, 0, 155]
[-1, 0, -1, 0, 161]
[-1, 0, -1, 0, 166]
[-1, 0, -1, 0, 167]
[0, 0, -1, 0, 4]
[0, 0, -1, 0, 5]
[0, 0, -1, 0, 11]
[0, 0, -1, 0, 17]
[0, 0, -1, 0, 23]
[0, 0, -1, 0, 29]
[-1, 0, 0, 0, 144]
[-1, 0, 0, 0, 145]
[-1, 0, 0, 0, 150]
[-1, 0, 0, 0, 151]
[-1, 0, 0, 0, 152]
[-1, 0, 0, 0, 156]
[-1, 0, 0, 0, 157]
[-1, 0, 0, 0, 158]
[-1, 0, 0, 0, 162]
[-1, 0, 0, 0, 163]
[-1, 0, 0, 0, 164]
[0, 0, 0, 1, 0]
[0, 0, 0, 1, 1]
[0, 0, 0, 1, 2]
[0, 0, 0, 1, 3]
[0, 0, 0, 1, 6]
[0, 0, 0, 1, 7]
[0, 0, 0, 1, 8]
[0, 0, 0, 1, 9]
[0, 0, 0, 1, 12]
[0, 0, 0, 1, 13]
[0, 0, 0, 1, 14]
[0, 0, 0, 1, 15]
[0, 0, 0, 1, 18]
[0, 0, 0, 1, 19]
[0, 0, 0, 1, 20]
[0, 0, 0, 1, 24]
[0, 0, 0, 1, 25]
[0, 0, 0, 1, 26]
[0, 0, 0, 1, 31]
[0, 0, 0, 1, 32]
[-1,

In [None]:
temp = H_block_sparse["[-1, 0, 0, 104, 102]"]

plt.imshow(temp, cmap='viridis')
plt.colorbar()
plt.xlabel('orb')
plt.ylabel('orb')

plt.show()

In [None]:
i=0
for Rijkab in H_block_sparse.keys():
    data = H_block_sparse[Rijkab]
    s = str(H_block_sparse[Rijkab].shape)
    s = "hamiltonians " + str(Rijkab) + " " + s
    plt.imshow(data, cmap='viridis')
    plt.colorbar()
    plt.xlabel('orb')
    plt.ylabel('orb')
    plt.title(s)
    plt.show()
    i=i+1
    if i==2:
        break

In [None]:
i=0
for Rijkab in S_block_sparse.keys():
    data = S_block_sparse[Rijkab]
    s = str(S_block_sparse[Rijkab].shape)
    s = "hamiltonians " + str(Rijkab) + " " + s
    plt.imshow(data, cmap='viridis')
    plt.colorbar()
    plt.xlabel('orb')
    plt.ylabel('orb')
    plt.title(s)
    plt.show()
    i=i+1
    if i==2:
        break

In [None]:
orbitals_per_atom = np.sum(2*orb_types + 1, axis=1)
print(orbitals_per_atom)
cumulative_orbitals = np.cumsum(orbitals_per_atom)
print(cumulative_orbitals)

H_block_connect = {}
for key in H_R.keys():
    key = list(map(int, key.split(',')))
    # print(key)
    atom_i = np.searchsorted(cumulative_orbitals, key[3]+1)
    atom_j = np.searchsorted(cumulative_orbitals, key[4]+1)
    # print(atom_i, atom_j)
    
    H_block_connect["[{},{},{},{},{}]".format(key[0], key[1], key[2], atom_i, atom_j)] = []

for key in H_R.keys():
    value = H_R[key]
    key = list(map(int, key.split(',')))
    # print(key)
    atom_i = np.searchsorted(cumulative_orbitals, key[3]+1)
    atom_j = np.searchsorted(cumulative_orbitals, key[4]+1)
    # print(atom_i, atom_j)
    
    u = key[3] - (cumulative_orbitals - 9)[atom_i]
    v = key[4] - (cumulative_orbitals - 9)[atom_j]
    # print(u, v)
    
    H_block_connect["[{},{},{},{},{}]".format(key[0], key[1], key[2], atom_i, atom_j)].append([u, v, value])

for key in H_block_connect.keys():
    values = H_block_connect[key]
    key = list(map(int, key.strip("[]").split(",")))
    hij = np.zeros((orbitals_per_atom[key[3]], orbitals_per_atom[key[4]]), dtype=float)
    for value in values:
        hij[int(value[0]),int(value[1])] = value[2]
    H_block_connect["[{},{},{},{},{}]".format(key[0], key[1], key[2], atom_i, atom_j)] = hij

# print(H_block_connect)

print(len(H_block_connect))

In [None]:
H_block = {}
for key in H_block_connect.keys():
    value = H_block_connect[key]
    key = list(map(int, key.split(',')))
    print(key)
    atom_i = np.searchsorted(cumulative_orbitals, key[3]+1)
    atom_j = np.searchsorted(cumulative_orbitals, key[4]+1)
    print(atom_i, atom_j)
    atom_orbitals_i = orbitals_per_atom[atom_i]
    atom_orbitals_j = orbitals_per_atom[atom_j]
    
    u = key[3] - (cumulative_orbitals - 9)[atom_i]
    v = key[4] - (cumulative_orbitals - 9)[atom_j]
    print(u, v)

    H_block["[{},{},{},{},{},{},{}]".format(key[0], key[1], key[2], atom_i, atom_j, u, v)] = value

In [None]:
orbitals_per_atom = np.array([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9])

# 原子轨道累加和列表
cumulative_orbitals = np.array([9, 18, 27, 36, 45, 54, 63, 72, 81, 90, 99, 108, 117, 126, 135, 144, 153, 162, 171, 180, 189, 198, 207, 216, 225, 234, 243, 252, 261, 270, 279, 288, 297])

# 找到第265个轨道所属的原子
index = np.searchsorted(cumulative_orbitals, 265)

# 所属的原子轨道数
atom_orbitals = orbitals_per_atom[index]

print(f"第265个轨道属于第 {index + 1} 个原子，该原子有 {atom_orbitals} 个轨道。")