# 处理原始数据

In [1]:
import os
import numpy as np
import pickle

In [2]:
# 读入颗粒数据
filepath = os.path.join('data','spsi_cutoff_25_1.txt')
particle = []
signature = []
with open(filepath) as file:
    for lines in file.readlines():
        line = lines.split()
        particle.append(np.array(line[0:3], dtype='float32'))
        signature.append(np.array(line[3:4], dtype='float32'))

# 颗粒数 周期边界
n_particle = particle[0][0]
box_size = particle[1][0]

# 1728个颗粒
particle = np.array(particle[2:], dtype='float32')
signature = np.array(signature[2:], dtype='float32')
signature = np.squeeze(signature)

In [3]:
print(n_particle, box_size)
print('particle:', particle[0:2], particle.shape, particle.dtype)
print('signature:', signature[0:2], signature.shape, particle.dtype)

1728 21.769487
particle: [[19.3346    8.07928   0.833602]
 [18.9701   15.1646    1.64121 ]] (1728, 3) float32
signature: [2. 1.] (1728,) float32


In [4]:
cutoff = 2.5
particle_period = []

# 扩充周期边界
for p in particle:
    for i in range(-1,2):
        for j in range(-1,2):
            for k in range(-1,2):
                if not [i,j,k]==[0,0,0]:
                    offset = np.array([i,j,k],dtype='float32')
                    p_new = p + box_size*offset
                    # print(pos,pos_new,offset)
                    if (p_new >= -cutoff).all() and (p_new <= box_size + cutoff).all():
                        particle_period.append(p_new)

particle_period = np.array(particle_period, dtype='float32')

In [5]:
particle_expand = np.concatenate([particle, particle_period])
struct = []

# 中心颗粒
for i in range(n_particle):

    # 与其他所有颗粒比较
    neighbour = []
    for j in range(len(particle_expand)):
        if not i == j:
            relate_coord = particle[i] - particle_expand[j]
            dis = np.linalg.norm(relate_coord)
            if dis <= cutoff:
                neighbour.append(relate_coord)

    struct.append(neighbour)

In [7]:
# 转化为 np 数组
max_len = 12
structure = np.zeros((n_particle, max_len, 3))

for i in range(n_particle):
    for j in range(max_len):
        try:
            structure[i,j] = struct[i][j]
        except Exception:
            break

In [8]:
pickle_file = 'spsi_1.pkl'

try:
    f = open(pickle_file, 'wb')
    save = {
        'structure':structure,
        'signature':signature,
    }
    pickle.dump(save, f)
    f.close()
    print('Done')
except Exception as e:
    print('Unable to save data:',e)

Done


In [9]:
pickle_file = 'spsi_1.pkl'

f = open(pickle_file, 'rb')
save = pickle.load(f)
structure = np.array(save['structure'], dtype='float32')
signature = np.array(save['signature'], dtype='float32')

print('structure:',structure[0:2], structure.shape, structure.dtype)
print('signature:',signature[0:2], signature.shape, signature.dtype)

structure: [[[-0.77549934  0.7129297  -1.7008381 ]
  [-0.14249992  1.8294797   0.7959696 ]
  [ 1.6442013   0.9305997   0.796118  ]
  [ 1.7859001  -0.14908028 -0.88851804]
  [ 0.8901005   1.6389098  -0.915898  ]
  [-1.8086987   0.85362005  0.005898  ]
  [-1.6301994  -0.95314026 -0.832998  ]
  [ 0.92570114 -1.7728901   0.01099598]
  [ 0.17239952 -1.0484104  -1.6945281 ]
  [-0.8781986  -1.6742401   0.8742896 ]
  [ 0.74300003 -0.72848034  1.7079897 ]
  [-1.0625992   0.13169003  1.7152891 ]]

 [[ 1.0475998  -0.11159992 -1.7008001 ]
  [-0.8368988  -1.6296997   0.87643194]
  [-1.6030998  -0.8785     -0.8114401 ]
  [-1.7821999   0.9281006   0.02900994]
  [-0.17499924  1.8032007   0.847524  ]
  [ 1.7646008  -0.941      -0.02807999]
  [ 0.8696003   1.6920004  -0.85448015]
  [ 1.6048012   0.87210083  0.81503296]
  [-0.78089905  0.7290001  -1.6908602 ]
  [ 0.1630001  -1.8165989  -0.84561   ]
  [-1.0128002   0.17730045  1.7154974 ]
  [ 0.76670074 -0.7372999   1.6940969 ]]] (1728, 12, 3) float32
sig

# 批处理

In [17]:
def preprocess(filepath):
    # 读入颗粒数据
    # filepath = os.path.join('data','spsi_cutoff_25_1.txt')
    particle = []
    signature = []
    with open(filepath) as file:
        for lines in file.readlines():
            line = lines.split()
            particle.append(np.array(line[0:3], dtype='float32'))
            signature.append(np.array(line[3:4], dtype='float32'))

    # 颗粒数 周期边界
    n_particle = particle[0][0]
    box_size = particle[1][0]

    # 1728个颗粒
    particle = np.array(particle[2:], dtype='float32')
    signature = np.array(signature[2:], dtype='float32')
    signature = np.squeeze(signature)

    cutoff = 2.5
    particle_period = []

    # 扩充周期边界
    for p in particle:
        for i in range(-1,2):
            for j in range(-1,2):
                for k in range(-1,2):
                    if not [i,j,k]==[0,0,0]:
                        offset = np.array([i,j,k],dtype='float32')
                        p_new = p + box_size*offset
                        # print(pos,pos_new,offset)
                        if (p_new >= -cutoff).all() and (p_new <= box_size + cutoff).all():
                            particle_period.append(p_new)

    particle_period = np.array(particle_period, dtype='float32')

    particle_expand = np.concatenate([particle, particle_period])
    struct = []

    # 中心颗粒
    for i in range(n_particle):

        # 与其他所有颗粒比较
        neighbour = []
        for j in range(len(particle_expand)):
            if not i == j:
                relate_coord = particle[i] - particle_expand[j]
                dis = np.linalg.norm(relate_coord)
                if dis <= cutoff:
                    neighbour.append(relate_coord)

        struct.append(neighbour)

    # 转化为 np 数组
    max_len = 12
    structure = np.zeros((n_particle, max_len, 3))

    for i in range(n_particle):
        for j in range(max_len):
            try:
                structure[i,j] = struct[i][j]
            except Exception:
                break
    
    return structure, signature

In [19]:
for i in range(2,11):
    filepath = os.path.join('data','spsi_cutoff_25_'+str(i)+'.txt')
    structure_next, signature_next = preprocess(filepath)
    structure = np.concatenate([structure, structure_next])
    signature = np.concatenate([signature, signature_next])

print('structure:',structure[0:2], structure.shape, structure.dtype)
print('signature:',signature[0:2], signature.shape, signature.dtype)

structure: [[[-0.77549934  0.71292973 -1.70083809]
  [-0.14249992  1.82947969  0.79596961]
  [ 1.64420128  0.93059969  0.79611802]
  [ 1.78590012 -0.14908028 -0.88851804]
  [ 0.89010048  1.63890982 -0.91589803]
  [-1.80869865  0.85362005  0.005898  ]
  [-1.63019943 -0.95314026 -0.83299798]
  [ 0.92570114 -1.77289009  0.01099598]
  [ 0.17239952 -1.04841042 -1.6945281 ]
  [-0.87819862 -1.67424011  0.87428957]
  [ 0.74300003 -0.72848034  1.70798969]
  [-1.06259918  0.13169003  1.71528912]]

 [[ 1.04759979 -0.11159992 -1.70080006]
  [-0.8368988  -1.62969971  0.87643194]
  [-1.60309982 -0.87849998 -0.81144011]
  [-1.78219986  0.92810059  0.02900994]
  [-0.17499924  1.80320072  0.84752399]
  [ 1.76460075 -0.94099998 -0.02807999]
  [ 0.8696003   1.69200039 -0.85448015]
  [ 1.60480118  0.87210083  0.81503296]
  [-0.78089905  0.72900009 -1.69086015]
  [ 0.16300011 -1.81659889 -0.84561002]
  [-1.01280022  0.17730045  1.71549737]
  [ 0.76670074 -0.73729992  1.69409692]]] (17280, 12, 3) float64
si

In [20]:
pickle_file = 'spsi_dataset.pkl'

try:
    f = open(pickle_file, 'wb')
    save = {
        'structure':structure,
        'signature':signature,
    }
    pickle.dump(save, f)
    f.close()
    print('Done')
except Exception as e:
    print('Unable to save data:',e)

Done
