In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"]="false"

import pathlib
import glob
from copy import deepcopy

import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import jax
import jax.numpy as jnp
import equinox as eqx

In [None]:
from mc2.utils.data_inspection import (
    get_available_material_names,
    get_file_overview,
    load_and_process_single_from_full_file_overview
)
from mc2.data_management import FrequencySet, MaterialSet, DataSet

## load data from pickle:

In [None]:
dataset = DataSet.load_from_file(pathlib.Path("../../data/processed") / "ten_mat_data.pickle")

## deleting N49 from dataset for now, since the data is incomplete
# 50 kHz and 80 kHz are missing
# 320 kHz has no data at 25 degrees

available_materials = deepcopy(dataset.material_names)
print(available_materials)
print(len(available_materials))

available_materials.remove("N49")
print(available_materials)
print(len(available_materials))


dataset = dataset.filter_materials(available_materials)
assert dataset.material_names == available_materials

# Structure of the data set:

- `DataSet`: holds a list of `MaterialSet` which can be accessed either through the index or via `at_material(material_name: str)`
- `MaterialSet`: holds a list of `FrequencySets` which can be accessed either through the index or via `at_frequency(frequency: float)`
- `FrequencySet`: holds the raw data as arrays with shape `(n_sequences, sequence_length)` for `B` and `H` and `(n_sequences)` for `T`

In [None]:
assert dataset[0] == dataset.at_material("78")
assert dataset[1] == dataset.at_material("3C90")
assert dataset[0] == dataset.at_material("78")
assert isinstance(dataset.at_material("78"), MaterialSet)

material_set_for_78 = dataset[0]
assert material_set_for_78[0] == material_set_for_78.at_frequency(50_000)
assert material_set_for_78[0] == material_set_for_78.at_frequency(material_set_for_78.frequencies[0])
assert isinstance(dataset[0][0], FrequencySet)

frequency_set_for_78_at_50kHz = dataset[0][0]
assert frequency_set_for_78_at_50kHz == dataset.at_material("78").at_frequency(50_000)

Filtering examples (three main functions):

- `{DataSet}.filter_materials(list[str] | str) -> {DataSet}`
- `{DataSet/MaterialSet}.filter_frequencies(list[float] | jnp.Array | float) -> {DataSet/MaterialSet}`
- `{DataSet/MaterialSet/FrequencySet}.filter_temperatures(list[float] | jnp.Array | float) -> {DataSet/MaterialSet/FrequencySet}` 

In [None]:
dataset_at_80kHz = dataset.filter_frequencies([80_000])
for m_set in dataset_at_80kHz:
    assert m_set.frequencies == jnp.array([80_000])

dataset_at_50_and_80kHz = dataset.filter_frequencies([50_000, 80_000])
for m_set in dataset_at_50_and_80kHz:
    assert jnp.all(m_set.frequencies == jnp.array([50_000, 80_000]))

In [None]:
dataset_at_25degrees = dataset.filter_temperatures([25])
for m_set in dataset_at_25degrees:
    for f_set in m_set:
        assert jnp.unique(f_set.T) == jnp.array([25])

# Exploratory Data Analysis (EDA):

### TODOS:
- build an exploratory data analysis
  - extend with further plots
  - generalize for all materials through subplots 
- use the one from MC1 as reference: https://github.com/upb-lea/hardcore-magnet-challenge/blob/main/notebooks/wk-1.1-eda.ipynb


### Questions:
- Is the end of a given sequence the starting point for the next? Officially no
- ...

## Broad overview

In [None]:
# Overall number of datapoints: roughly 850M
number = 0

for material_set in dataset:
    for frequency_set in material_set:
        number += frequency_set.H.size

print(number)

### Datapoints per material:

In [None]:
datapoints_per_material = {material_name: 0 for material_name in dataset.material_names}
for idx, material_set in enumerate(dataset):
    for frequency_set in material_set:
        assert frequency_set.H.shape ==  frequency_set.B.shape
        datapoints_per_material[frequency_set.material_name] += frequency_set.H.size

fig, ax = plt.subplots(figsize=(4, 4), dpi=150)
ax.pie(list(datapoints_per_material.values()), labels=list(datapoints_per_material.keys()))
ax.set_title("Size portions per material");

### Datapoints per frequency:

In [None]:
datapoints_per_frequency = {frequency: 0 for frequency in dataset[0].frequencies.tolist()}
for idx, material_set in enumerate(dataset):
    for frequency_set in material_set:
        datapoints_per_frequency[frequency_set.frequency] += frequency_set.H.size

fig, ax = plt.subplots(figsize=(4, 4), dpi=150)
ax.pie(list(datapoints_per_frequency.values()), labels=list(datapoints_per_frequency.keys()))
ax.set_title("Size portions per frequency in kHz");

In [None]:
fig, axs = plt.subplots(2, 5, figsize=(20, 6), sharey=True, sharex=True)

fig.suptitle("Datapoints per frequency per material")

for idx, material_set in enumerate(dataset):
    row_idx = 0 if idx < 5 else 1
    col_idx = idx % 5
    
    full_set_lengths = []
    for freq_set in material_set:
        n_sequences, sequence_length = freq_set.B.shape
        full_set_length = n_sequences * sequence_length
        full_set_lengths.append(full_set_length)

    axs[row_idx, col_idx].plot(material_set.frequencies / 1e3, np.array(full_set_lengths) / 1e6)
    axs[row_idx, col_idx].set_title(material_set.material_name)
    
for ax in axs[-1]:
    ax.set_xlabel("f in kHz")

for ax in axs[:, 0]:
    ax.set_ylabel("# of datapoints in M")
        
for ax_ in axs:
    for ax in ax_:
        ax.grid()

fig.tight_layout()
plt.show()

### Datapoints per temperature:

In [None]:
datapoints_per_temperature = {temperature: 0.0 for temperature in jnp.unique(dataset[0][0].T).tolist()}
for idx, material_set in enumerate(dataset):
    for frequency_set in material_set:
        for temperature in datapoints_per_temperature.keys():
            datapoints_per_temperature[temperature] += jnp.sum(frequency_set.H[jnp.where(frequency_set.T == temperature)[0]].size)

fig, ax = plt.subplots(figsize=(4, 4), dpi=150)
ax.pie(list(datapoints_per_temperature.values()), labels=list(datapoints_per_temperature.keys()))
ax.set_title("Size portions per Temperature in 째C");

In [None]:
print("unique temperatures per frequency")
unique_temperatures = {int(frequency): jnp.unique(freq_set.T) for frequency, freq_set in zip(material_set.frequencies, material_set)}
display(unique_temperatures)

for freq_set in material_set:
    plt.suptitle("Temperatures per sequence")
    plt.plot(freq_set.T, label=str(int(freq_set.frequency / 1e3)) + " kHz")
    plt.legend()

plt.xlabel("sequence index")
plt.ylabel("T in 째C")
plt.grid()
plt.show()

In [None]:
raise NotImplementedError("Generalize for all materials.")

# full_set_lengths = []
# for freq_set in material_set:

#     n_sequences, sequence_length = freq_set.B.shape
#     full_set_length = n_sequences * sequence_length

#     full_set_lengths.append(full_set_length)

# plt.suptitle("Datapoints per temperature")
# plt.plot(temperatures, n_data_points)
# plt.xlabel("T in 째C")
# plt.grid()
# plt.ylabel("# of datapoints")

### H Histogram per material:

In [None]:
fig, axs = plt.subplots(2, 5, figsize=(20, 6))
fig.suptitle("H histogram per material")

for idx, material_set in enumerate(dataset):
    row_idx = 0 if idx < 5 else 1
    col_idx = idx % 5

    H_values = jnp.concatenate([freq_set.H.flatten() for freq_set in material_set])
    axs[row_idx, col_idx].hist(H_values, bins=50)
    axs[row_idx, col_idx].set_title(material_set.material_name)
    
for ax in axs[-1]:
    ax.set_xlabel("H in A/m")

for ax in axs[:, 0]:
    ax.set_ylabel("# of datapoints")

for ax_ in axs:
    for ax in ax_:
        ax.grid()

fig.tight_layout()
plt.show()

Fairly poor distribution considering high valued $H(t)$. How should one deal with this? Likely models will struggle in these value ranges?

### B Histogram per material:

In [None]:
fig, axs = plt.subplots(2, 5, figsize=(20, 6))
fig.suptitle("B histogram per material")

for idx, material_set in enumerate(dataset):
    row_idx = 0 if idx < 5 else 1
    col_idx = idx % 5

    B_values = jnp.concatenate([freq_set.B.flatten() for freq_set in material_set])
    axs[row_idx, col_idx].hist(B_values, bins=50)
    axs[row_idx, col_idx].set_title(material_set.material_name)
    
for ax in axs[-1]:
    ax.set_xlabel("B in Vs/m^2")

for ax in axs[:, 0]:
    ax.set_ylabel("# of datapoints")

for ax_ in axs:
    for ax in ax_:
        ax.grid()

fig.tight_layout()
plt.show()

## Main Loop Hysteresis plots:

### Single example:

In [None]:
# BH curves

fig, axs = plt.subplots(2, 5, figsize=(20, 6))
fig.suptitle("Exemplary hysteresis per material for 50 kHz")

for idx, material_set in tqdm.tqdm(enumerate(dataset)):
    row_idx = 0 if idx < 5 else 1
    col_idx = idx % 5

    B_values = material_set[0].B[0, :]
    H_values = material_set[0].H[0, :]
    axs[row_idx, col_idx].plot(H_values, B_values)
    axs[row_idx, col_idx].set_title(material_set.material_name)
    
for ax in axs[-1]:
    ax.set_ylabel("B in Vs/m^2")

for ax in axs[:, 0]:
    ax.set_xlabel("H in A/m")

for ax_ in axs:
    for ax in ax_:
        ax.grid()

fig.tight_layout()
plt.show()

In [None]:
# HB curves

fig, axs = plt.subplots(2, 5, figsize=(20, 6))
fig.suptitle("Exemplary hysteresis per material for 50 kHz")

for idx, material_set in tqdm.tqdm(enumerate(dataset)):
    row_idx = 0 if idx < 5 else 1
    col_idx = idx % 5

    B_values = material_set[0].B[125, :]
    H_values = material_set[0].H[125, :]
    axs[row_idx, col_idx].plot(B_values, H_values)
    axs[row_idx, col_idx].set_title(material_set.material_name)
    
for ax in axs[-1]:
    ax.set_ylabel("B in Vs/m^2")

for ax in axs[:, 0]:
    ax.set_xlabel("H in A/m")

for ax_ in axs:
    for ax in ax_:
        ax.grid()

fig.tight_layout()
plt.show()

### All over each other:

In [None]:
# HB curves

fig, axs = plt.subplots(2, 5, figsize=(20, 6))
fig.suptitle("Hysteresis per material")

for idx, material_set in tqdm.tqdm(enumerate(dataset)):
    row_idx = 0 if idx < 5 else 1
    col_idx = idx % 5
    
    for frequency_set in material_set: 
        for sequence_idx in range(frequency_set.B.shape[0]):
            B_values = frequency_set.B[sequence_idx, :]
            H_values = frequency_set.H[sequence_idx, :]
            axs[row_idx, col_idx].plot(B_values, H_values, alpha=.7)

    axs[row_idx, col_idx].set_title(material_set.material_name)
    
for ax in axs[-1]:
    ax.set_ylabel("B in Vs/m^2")

for ax in axs[:, 0]:
    ax.set_xlabel("H in A/m")

for ax_ in axs:
    for ax in ax_:
        ax.grid()

fig.tight_layout()
plt.show()

### All over each other, one frequency at a time:

In [None]:
# HB curves

for frequency in tqdm.tqdm(dataset[0].frequencies):
    fig, axs = plt.subplots(2, 5, figsize=(20, 6))
    fig.suptitle("Hysteresis per material")
    
    for idx, material_set in enumerate(dataset):
        row_idx = 0 if idx < 5 else 1
        col_idx = idx % 5
    
        frequency_set = material_set.at_frequency(frequency) 
        for sequence_idx in range(frequency_set.B.shape[0]):
            B_values = frequency_set.B[sequence_idx, :]
            H_values = frequency_set.H[sequence_idx, :]
            axs[row_idx, col_idx].plot(B_values, H_values, alpha=.7)
    
        axs[row_idx, col_idx].set_title(material_set.material_name + " at " + str(frequency_set.frequency / 1e3) + " kHz")
        
    for ax in axs[-1]:
        ax.set_ylabel("B in Vs/m^2")
    
    for ax in axs[:, 0]:
        ax.set_xlabel("H in A/m")
    
    for ax_ in axs:
        for ax in ax_:
            ax.grid()
    
    fig.tight_layout()
    plt.show()

### Split by temperatures:

In [None]:
# HB curves

for temperature in [25, 50, 70]:
    for frequency in tqdm.tqdm(dataset[0].frequencies):
        fig, axs = plt.subplots(2, 5, figsize=(20, 6))
        fig.suptitle(f"Hysteresis per material at {temperature} 째C")
        
        for idx, material_set in enumerate(dataset.filter_temperatures(temperature)):
            row_idx = 0 if idx < 5 else 1
            col_idx = idx % 5
        
            frequency_set = material_set.at_frequency(frequency) 
            for sequence_idx in range(frequency_set.B.shape[0]):
                B_values = frequency_set.B[sequence_idx, :]
                H_values = frequency_set.H[sequence_idx, :]
                axs[row_idx, col_idx].plot(B_values, H_values, alpha=.7)
        
            axs[row_idx, col_idx].set_title(material_set.material_name + " at " + str(frequency_set.frequency / 1e3) + " kHz")
            
        for ax in axs[-1]:
            ax.set_ylabel("B in Vs/m^2")
        
        for ax in axs[:, 0]:
            ax.set_xlabel("H in A/m")
        
        for ax_ in axs:
            for ax in ax_:
                ax.grid()
        
        fig.tight_layout()
        plt.show()

### Normalized by maximum sequence values:

In [None]:
# HB curves

for frequency in tqdm.tqdm(dataset[0].frequencies):
    fig, axs = plt.subplots(2, 5, figsize=(20, 6))
    fig.suptitle(f"Hysteresis per material")
    
    for idx, material_set in enumerate(dataset):
        row_idx = 0 if idx < 5 else 1
        col_idx = idx % 5

        frequency_set = material_set.at_frequency(frequency) 
        for sequence_idx in range(frequency_set.B.shape[0]):
            B_values = frequency_set.B[sequence_idx, :]
            H_values = frequency_set.H[sequence_idx, :]
            axs[row_idx, col_idx].plot(B_values / jnp.max(jnp.abs(B_values)), H_values / jnp.max(jnp.abs(H_values)), alpha=.7)
    
        axs[row_idx, col_idx].set_title(material_set.material_name + " at " + str(frequency_set.frequency / 1e3) + " kHz")
        
    for ax in axs[-1]:
        ax.set_ylabel("B in Vs/m^2")
    
    for ax in axs[:, 0]:
        ax.set_xlabel("H in A/m")
    
    for ax_ in axs:
        for ax in ax_:
            ax.grid()
    
    fig.tight_layout()
    plt.show()

## Local behavior:

### Exemplary Subloops at 50 kHz

In [None]:
# HB curves

fig, axs = plt.subplots(2, 5, figsize=(20, 6))
fig.suptitle("Exemplary short trajectory per material for 50 kHz")

for idx, material_set in tqdm.tqdm(enumerate(dataset)):
    row_idx = 0 if idx < 5 else 1
    col_idx = idx % 5

    B_values = material_set[0].B[125, 1000:2000]
    H_values = material_set[0].H[125, 1000:2000]
    axs[row_idx, col_idx].plot(B_values, H_values)
    axs[row_idx, col_idx].set_title(material_set.material_name)
    
for ax in axs[-1]:
    ax.set_ylabel("B in Vs/m^2")

for ax in axs[:, 0]:
    ax.set_xlabel("H in A/m")

for ax_ in axs:
    for ax in ax_:
        ax.grid()

fig.tight_layout()
plt.show()

### Sequence starts (10 steps)

In [None]:
# HB curves

for frequency in tqdm.tqdm(dataset[0].frequencies):
    fig, axs = plt.subplots(2, 5, figsize=(20, 6))
    fig.suptitle("Hysteresis per material")
    
    for idx, material_set in enumerate(dataset):
        row_idx = 0 if idx < 5 else 1
        col_idx = idx % 5
    
        frequency_set = material_set.at_frequency(frequency) 
        for sequence_idx in range(frequency_set.B.shape[0]):
            B_values = frequency_set.B[sequence_idx, :10]
            H_values = frequency_set.H[sequence_idx, :10]
            axs[row_idx, col_idx].plot(B_values, H_values, alpha=.7)
    
        axs[row_idx, col_idx].set_title(material_set.material_name + " at " + str(frequency_set.frequency / 1e3) + " kHz")
        
    for ax in axs[-1]:
        ax.set_ylabel("B in Vs/m^2")
    
    for ax in axs[:, 0]:
        ax.set_xlabel("H in A/m")
    
    for ax_ in axs:
        for ax in ax_:
            ax.grid()
    
    fig.tight_layout()
    plt.show()

## Exemplary Time series:

In [None]:
# H(t) and B(t) curves

for seq_idx, start, end in zip(jnp.arange(0, 10, 1), jnp.arange(0, 10_000, 1_000), jnp.arange(1_000, 11_000, 1_000)):

    fig, axs = plt.subplots(2, 5, figsize=(20, 6))
    fig.suptitle("Exemplary short trajectory per material for 50 kHz")
    
    for idx, material_set in tqdm.tqdm(enumerate(dataset)):
        row_idx = 0 if idx < 5 else 1
        col_idx = idx % 5
    
        B_values = material_set[0].B[seq_idx, start:end]
        H_values = material_set[0].H[seq_idx, start:end]
        axs[row_idx, col_idx].plot(H_values, label="H")
        axs[row_idx, col_idx].plot(B_values, label="B")
        axs[row_idx, col_idx].set_title(material_set.material_name)
        
    
    for ax_ in axs:
        for ax in ax_:
            ax.grid()
            ax.legend()
    
    fig.tight_layout()
    plt.show()

In [None]:
# Normalized H(t) and B(t) curves

for seq_idx, start, end in zip(jnp.arange(0, 10, 1), jnp.arange(0, 10_000, 1_000), jnp.arange(1_000, 11_000, 1_000)):

    fig, axs = plt.subplots(2, 5, figsize=(20, 6))
    fig.suptitle("Exemplary short trajectory per material for 50 kHz")
    
    for idx, material_set in tqdm.tqdm(enumerate(dataset)):
        row_idx = 0 if idx < 5 else 1
        col_idx = idx % 5
    
        B_values = material_set[0].B[seq_idx, start:end]
        H_values = material_set[0].H[seq_idx, start:end]
        axs[row_idx, col_idx].plot(H_values / jnp.max(jnp.abs(H_values)), label="H")
        axs[row_idx, col_idx].plot(B_values / jnp.max(jnp.abs(B_values)), label="B")
        axs[row_idx, col_idx].set_title(material_set.material_name)
        
    
    for ax_ in axs:
        for ax in ax_:
            ax.grid()
            ax.legend()
    
    fig.tight_layout()
    plt.show()

In [None]:
# Normalized H(t) and B(t) curves

for seq_idx, start, end in zip(jnp.arange(0, 10, 1), jnp.arange(0, 10_000, 1_000), jnp.arange(1_000, 11_000, 1_000)):

    fig, axs = plt.subplots(2, 5, figsize=(20, 6))
    fig.suptitle("Exemplary short trajectory per material for 50 kHz")
    
    for idx, material_set in tqdm.tqdm(enumerate(dataset)):
        row_idx = 0 if idx < 5 else 1
        col_idx = idx % 5
    
        B_values = material_set[0].B[seq_idx, start:end]
        H_values = material_set[0].H[seq_idx, start:end]
        axs[row_idx, col_idx].plot(H_values / jnp.max(jnp.abs(material_set[0].H[seq_idx, :])), label="H")
        axs[row_idx, col_idx].plot(B_values / jnp.max(jnp.abs(material_set[0].B[seq_idx, :])), label="B")
        axs[row_idx, col_idx].set_title(material_set.material_name)
        
    
    for ax_ in axs:
        for ax in ax_:
            ax.grid()
            ax.legend()
    
    fig.tight_layout()
    plt.show()

- Sign changes are always reflected as sign changes in the other component, i.e., a sign change in B always occurs together with a sign change in H.
- It seems that B reacts on the changes in B after the change has already occured
- **you have to be able to make simplifications/regularizations based on this**
- Is it easier to predict $\Delta H$ instead of the full value?

## Subsampling:
- the signals are heavily oversampled and can be drastically reduced without loss of information

In [None]:
# Normalized H(t) and B(t) curves

raise ValueError("time axis seems off")

tau = 1 / (16 * 10^6)

for frequency in tqdm.tqdm(dataset[0].frequencies):
    for seq_idx, start, end in zip(jnp.arange(0, 2, 1), jnp.arange(0, 300, 100), jnp.arange(300, 600, 100)):
    
        fig, axs = plt.subplots(2, 5, figsize=(20, 6))
        fig.suptitle(f"Exemplary short trajectory per material for {frequency} kHz")
        
        for idx, material_set in tqdm.tqdm(enumerate(dataset)):
            row_idx = 0 if idx < 5 else 1
            col_idx = idx % 5

            frequency_set = material_set.at_frequency(frequency)

            subsampling_freq = 10
            t = np.linspace(0, (end - start - 1) * tau, int((end - start) / subsampling_freq))

            B_values = frequency_set.B[seq_idx, start:end:subsampling_freq]
            H_values = frequency_set.H[seq_idx, start:end:subsampling_freq]
            axs[row_idx, col_idx].plot(t, H_values / jnp.max(jnp.abs(frequency_set.H[seq_idx, :])), label="H_subsampled")
            axs[row_idx, col_idx].plot(t, B_values / jnp.max(jnp.abs(frequency_set.B[seq_idx, :])), label="B_subsampled")
            axs[row_idx, col_idx].set_title(material_set.material_name)

            subsampling_freq = 1
            t = np.linspace(0, (end - start -1) * tau, int((end - start) / subsampling_freq))
            
            B_values = frequency_set.B[seq_idx, start:end:subsampling_freq]
            H_values = frequency_set.H[seq_idx, start:end:subsampling_freq]
            axs[row_idx, col_idx].plot(t, H_values / jnp.max(jnp.abs(frequency_set.H[seq_idx, :])), label="H")
            axs[row_idx, col_idx].plot(t, B_values / jnp.max(jnp.abs(frequency_set.B[seq_idx, :])), label="B")
            axs[row_idx, col_idx].set_title(material_set.material_name)
            
        
        for ax_ in axs:
            for ax in ax_:
                ax.grid()
                ax.legend()
        
        fig.tight_layout()
        plt.show()

Does the behavior look similar if I subsample the higher frequencies?

How strong is the influence of frequency on the dynamics?