In [1]:
import os
from ase.io import read, write
import dpdata as dp
import numpy as np

In [2]:
# Paths
input_dir = "outcars/"  # Replace with your directory containing OUTCAR files
output_dir = "test1/"  # Replace with your desired output directory

# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Loop through files in the input directory
# Loop through files in the input directory
for file_name in os.listdir(input_dir):
    if file_name.startswith("OUTCAR"):  # Check if the file starts with "OUTCAR"
        input_path = os.path.join(input_dir, file_name)
        output_path = os.path.join(output_dir, file_name + ".traj")
        print(input_path, output_path)
        try:
            atoms = read(input_path,index=":", format="vasp-out")  # Read OUTCAR
            write(output_path, atoms, format = "traj")  # Write to .traj format
            print(f"Converted {file_name} to {output_path}")
        except Exception as e:
            print(f"Failed to convert {file_name}: {e}")

outcars/OUTCAR111t500K1 test1/OUTCAR111t500K1.traj
Converted OUTCAR111t500K1 to test1/OUTCAR111t500K1.traj


In [5]:
dpset = dp.MultiSystems.from_dir( dir_name="./test1", file_name="OUTCAR*", fmt="ase/traj")

In [6]:
dpset

MultiSystems (1 systems containing 1224 frames)

In [7]:
dpset.to_deepmd_npy("deepmd")

MultiSystems (1 systems containing 1224 frames)

In [8]:
dpset

MultiSystems (1 systems containing 1224 frames)

In [9]:
np.random.seed(42)

train_systems = dp.MultiSystems()
val_systems = dp.MultiSystems()

# Iterate through each system
for system in dpset:
    num_frames = len(system)
    indices = np.arange(num_frames)
    np.random.shuffle(indices)  # Randomly shuffle indices
    
    # Split into train (80%) and validation (20%)
    split_idx = int(num_frames * 0.8)
    train_indices = indices[:split_idx]
    val_indices = indices[split_idx:]
    
    train_systems.append(system.sub_system(train_indices))
    val_systems.append(system.sub_system(val_indices))

In [10]:
train_systems

MultiSystems (1 systems containing 979 frames)

In [11]:
val_systems

MultiSystems (1 systems containing 245 frames)

In [12]:
train_systems.to_deepmd_npy("train_data")
val_systems.to_deepmd_npy('val_data')

MultiSystems (1 systems containing 245 frames)