### Import python modules 

In [3]:
# 导入模块
import os
import math
import random

import numpy as np

import torch

root_path = os.path.abspath('.')

### Raw data processing

In [5]:
num_sample,num_inline,num_crossline = 128,256,256
data_name = f"0108-{num_sample}x{num_inline}x{num_crossline}"
dataset_name = data_name

data_path = os.path.join(os.path.abspath('..'), "DATA", data_name)
dataset_path = os.path.join(root_path, "datasets", dataset_name)
print(f"raw data path: {data_path}")
print(f"dataset path: {dataset_path}")

seis_path = os.path.join(data_path, "seis")
rgt_path = os.path.join(data_path, "rgt")
fault_path = os.path.join(data_path, "fault")

if not os.path.exists(dataset_path):
    os.makedirs(dataset_path)
    
train_sample_path = os.path.join(dataset_path, "train")
if not os.path.exists(train_sample_path):
    os.makedirs(train_sample_path)
    
pic_save_path = os.path.join(dataset_path, "pics")
if not os.path.exists(pic_save_path):
    os.makedirs(pic_save_path)

raw data path: /home/zfbi/Research/DeepISMNet-master/DATA/0108-128x256x256
dataset path: /home/zfbi/Research/DeepISMNet-master/implicitStructuralModeling2d/datasets/0108-128x256x256


### Generating training and validation datasets

In [8]:
file_list = os.listdir(rgt_path)

file_name_list = [int(i.split(".")[0]) for i in file_list]
file_name_list = sorted(enumerate(file_name_list), key=lambda x:x[1]) 
file_list = [file_list[i] for i in [j[0] for j in file_name_list]]

num_file = len(file_list)
num_profile_inline = 4
num_profile_crossline = 4
edge_mute = 10

In [9]:
def get_section_data(file_list, num_file, num_profile_inline, num_profile_crossline, edge_mute, sample_path):
    
    sample_list = []
    sample_count = 0
    for i in range(num_file):
        print(f"Processing raw data file {i+1}/{num_file} ...")

        file_name = file_list[i]

        seis_file_path = os.path.join(seis_path,file_name)
        seis_cube = np.fromfile(seis_file_path, dtype=np.single)
        seis_cube = np.reshape(seis_cube, (num_crossline, num_inline, num_sample))
        
        rgt_file_path = os.path.join(rgt_path,file_name)
        rgt_cube = np.fromfile(rgt_file_path, dtype=np.single)
        rgt_cube = np.reshape(rgt_cube, (num_crossline, num_inline, num_sample))
        
        fault_file_path = os.path.join(fault_path,file_name)
        fault_cube = np.fromfile(fault_file_path, dtype=np.single)  
        fault_cube = np.reshape(fault_cube, (num_crossline, num_inline, num_sample))

        # crossline方向抽取剖面
        profile_locations = np.linspace(num_crossline//edge_mute, num_crossline//edge_mute*(edge_mute-1), 
                                        num_profile_crossline, endpoint=False, dtype=int)
        for profile_location in profile_locations:

            sample = {}
            sample["seis"] = seis_cube[profile_location, :, :].T
            sample["rgt"] = rgt_cube[profile_location, :, :].T
            sample["fault"] = fault_cube[profile_location, :, :].T

            sample_file = f'sample_{sample_count}.npy'
            np.save(os.path.join(sample_path, sample_file), sample)  
            sample_list.append(sample_file)
            sample_count += 1

        # inline方向抽取剖面
        profile_locations = np.linspace(num_inline//edge_mute, num_inline//edge_mute*(edge_mute-1), 
                                        num_profile_inline, endpoint=False, dtype=int)
        for profile_location in profile_locations:

            sample = {}
            sample["seis"] = seis_cube[:, profile_location, :].T
            sample["rgt"] = rgt_cube[:, profile_location, :].T
            sample["fault"] = fault_cube[:, profile_location, :].T
        
            sample_file = f'sample_{sample_count}.npy'
            np.save(os.path.join(sample_path, sample_file), sample)  
            sample_list.append(sample_file)
            sample_count += 1   
            
    return sample_list

In [10]:
train_sample_list = get_section_data(file_list, num_file, num_profile_inline, num_profile_crossline, edge_mute, train_sample_path) 

Processing raw data file 1/600 ...
Processing raw data file 2/600 ...
Processing raw data file 3/600 ...
Processing raw data file 4/600 ...
Processing raw data file 5/600 ...
Processing raw data file 6/600 ...
Processing raw data file 7/600 ...
Processing raw data file 8/600 ...
Processing raw data file 9/600 ...
Processing raw data file 10/600 ...
Processing raw data file 11/600 ...
Processing raw data file 12/600 ...
Processing raw data file 13/600 ...
Processing raw data file 14/600 ...
Processing raw data file 15/600 ...
Processing raw data file 16/600 ...
Processing raw data file 17/600 ...
Processing raw data file 18/600 ...
Processing raw data file 19/600 ...
Processing raw data file 20/600 ...
Processing raw data file 21/600 ...
Processing raw data file 22/600 ...
Processing raw data file 23/600 ...
Processing raw data file 24/600 ...
Processing raw data file 25/600 ...
Processing raw data file 26/600 ...
Processing raw data file 27/600 ...
Processing raw data file 28/600 ...
P

Processing raw data file 226/600 ...
Processing raw data file 227/600 ...
Processing raw data file 228/600 ...
Processing raw data file 229/600 ...
Processing raw data file 230/600 ...
Processing raw data file 231/600 ...
Processing raw data file 232/600 ...
Processing raw data file 233/600 ...
Processing raw data file 234/600 ...
Processing raw data file 235/600 ...
Processing raw data file 236/600 ...
Processing raw data file 237/600 ...
Processing raw data file 238/600 ...
Processing raw data file 239/600 ...
Processing raw data file 240/600 ...
Processing raw data file 241/600 ...
Processing raw data file 242/600 ...
Processing raw data file 243/600 ...
Processing raw data file 244/600 ...
Processing raw data file 245/600 ...
Processing raw data file 246/600 ...
Processing raw data file 247/600 ...
Processing raw data file 248/600 ...
Processing raw data file 249/600 ...
Processing raw data file 250/600 ...
Processing raw data file 251/600 ...
Processing raw data file 252/600 ...
P

Processing raw data file 448/600 ...
Processing raw data file 449/600 ...
Processing raw data file 450/600 ...
Processing raw data file 451/600 ...
Processing raw data file 452/600 ...
Processing raw data file 453/600 ...
Processing raw data file 454/600 ...
Processing raw data file 455/600 ...
Processing raw data file 456/600 ...
Processing raw data file 457/600 ...
Processing raw data file 458/600 ...
Processing raw data file 459/600 ...
Processing raw data file 460/600 ...
Processing raw data file 461/600 ...
Processing raw data file 462/600 ...
Processing raw data file 463/600 ...
Processing raw data file 464/600 ...
Processing raw data file 465/600 ...
Processing raw data file 466/600 ...
Processing raw data file 467/600 ...
Processing raw data file 468/600 ...
Processing raw data file 469/600 ...
Processing raw data file 470/600 ...
Processing raw data file 471/600 ...
Processing raw data file 472/600 ...
Processing raw data file 473/600 ...
Processing raw data file 474/600 ...
P

### 划分训练集和测试集

In [12]:
#训练集比例
train_set_ratio = 0.9

num_train_sample = len(train_sample_list)
print(f"样本库总数: {num_train_sample}")

# 混乱数据集
random.shuffle(train_sample_list)

# 训练集/验证集划分
valid_num = int(num_train_sample * (1-train_set_ratio))
valid_sample_list = random.sample(train_sample_list, valid_num)

samples_train,samples_valid = [],[]

for i_sample in train_sample_list[:num_train_sample]:
    if i_sample not in valid_sample_list:
        samples_train.append(i_sample)
    else:
        samples_valid.append(i_sample)

print(f'训练样本数量：{len(samples_train)}')
print(f'验证样本数量：{len(samples_valid)}')
np.save(os.path.join(dataset_path, 'samples_train.npy'), samples_train)
np.save(os.path.join(dataset_path, 'samples_valid.npy'), samples_valid)

样本库总数: 4800
训练样本数量：4321
验证样本数量：479
