# Data Preparation

This notebook gives an example of how to prepare your own data to train the model. 

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from pymatgen.core import Structure

In [2]:
def get_structures():
    """Create a pymatgen structure for Si."""
    return Structure(
        lattice=np.array([[0, 2.73, 2.73], [2.73, 0, 2.73], [2.73, 2.73, 0]]),
        species=["Si", "Si"],
        coords=[[0, 0, 0], [0.25, 0.25, 0.25]],
    )


def get_tensor(seed: int = 35):
    """Generate random 3x3x3x3 elastic tensor.

    Note, this is by no means a physical tensor that satisfies the symmetry of any
    crystal. It is just a random array to show the data preparation process.
    """
    np.random.seed(seed)
    t = np.random.rand(3, 3, 3, 3)

    return t

## Get data 

Here we simply make 10 copies of the Si structure and 10 copies of the elastic tensor. 
You should replace this with your own data.

In [3]:
Si = get_structures()
t = get_tensor()

structures = [Si for _ in range(10)]
tensors = [t for _ in range(10)]

## Write data to file

In [4]:
def write_data(
    structures: list[Structure],
    tensors: list[np.ndarray],
    path: Path = "elasticity_tensors.json",
):
    """Write structures and tensors to file.

    Args:
        structures: list of pymatgen structures.
        tensors: list of 3x3x3x3 elastic tensors.
        path: path to write the data.
    """
    data = {
        "structure": [s.as_dict() for s in structures],
        "elastic_tensor_full": [t.tolist() for t in tensors],
    }
    df = pd.DataFrame(data)

    df.to_json(path)

In [5]:
write_data(structures, tensors)