# Build a Kriging surrogate on some dummy prop data

In [13]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import dummy_prop_example
from smt.sampling_methods import FullFactorial
from smt.surrogate_models import KRG
from sklearn.model_selection import train_test_split 
import sys
from pathlib import Path
sys.path[0] = str(Path(sys.path[0]).parent)
# from unipy import surrogate_model

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


**First let us consider a 'low fidelity' dataset. We could build a full factorial of data if this is cheap, so lets pretend to do that.**

We also may choose to remove some cases, since we can't always successfully manage to build a full factorial set of structured data. 

Let us set up the sampling limits:

*Note:* that we use the kwarg `clip=True` in order for the number of points to give a full grid.

In [14]:
# setup each limit
discangle_limits = [-90.0, 90.0]
propspeed_limits = [400.0, 1200.0]
airspeed_limits = [0.0, 60.0]
# and we group them into a list
xlimits = np.array([discangle_limits, airspeed_limits, propspeed_limits])
# and make the full factorial sampling
lf_sampling = FullFactorial(xlimits=xlimits, clip=True)
number_of_lf_samples = 200
lf_independent = lf_sampling(number_of_lf_samples)
print(f"Actual number of samples generated is: {lf_independent[:, 0].shape[0]}")

Actual number of samples generated is: 216


**now we can plot the inputs, just to see what we have achieved**

In [3]:
fig = go.Figure(
    data=[
        go.Scatter3d(
            x=lf_independent[:, 0],
            y=lf_independent[:, 1],
            z=lf_independent[:, 2],
            mode="markers",
            marker=dict(color="LightSkyBlue", size=4),
        )
    ]
)

fig.update_layout(
    scene=dict(
        xaxis_title="disc angle [deg]",
        yaxis_title="airspeed [m/s]",
        zaxis_title="prop speed [RPM]",
    ),
)

fig.show()


**It is now time to build a Pandas DataFrame, containg all of this data**

We don't really need to do this to build a Kirging surrogate, however we are doing this as it is common that the data won't just come out as a tidy NumPy array, and instead will be loads from some .csv file or similar.

*Note:* that the arguments after the sampling points are used to caluclate the dependent variable and are unique to the propeller example we are running.

In [16]:
lf_data_df: pd.DataFrame = dummy_prop_example.lf_data(
    lf_independent[:, 0],
    lf_independent[:, 1],
    lf_independent[:, 2],
    1000.0,
    4000.0,
)

display(lf_data_df)

Unnamed: 0,airspeed,discangle,propspeed,load
0,0.0,-90.0,400.0,640.0
1,0.0,-90.0,560.0,1254.4
2,0.0,-90.0,720.0,2073.6
3,0.0,-90.0,880.0,3097.6
4,0.0,-90.0,1040.0,4326.4
...,...,...,...,...
211,60.0,90.0,560.0,12774.4
212,60.0,90.0,720.0,13593.6
213,60.0,90.0,880.0,14617.6
214,60.0,90.0,1040.0,15846.4


**At this stage, it is probably worthwhile to see what the data looks like**

*Note:* that this data and the trends are made up, the trends are also made up

In [17]:
rpms = np.unique(lf_data_df.propspeed.to_numpy())
rpm = rpms[-1]
plot_trend_df = lf_data_df[lf_data_df.propspeed == rpm]

fig = go.Figure(
    data=[
        go.Scatter3d(
            x=plot_trend_df.discangle,
            y=plot_trend_df.airspeed,
            z=plot_trend_df.load,
            mode="markers",
            marker=dict(
                color="LightSkyBlue", size=4),
        )
    ]
)

fig.update_layout(
    scene=dict(
        xaxis_title="disc angle [deg]",
        yaxis_title="airspeed [m/s]",
        zaxis_title="thrust [N]",
    ),
    title=f"Prop speed is {rpm} RPM",
)


**Now we can build the Kirging surrogate model, from the DataFrame**

In [6]:
# we have yet to explore theta0, using default from docs
krg_sm = KRG(theta0=[1e-2])

**Lets now write a function which can split the data using train-test split from sklearn**

We probably wouldn't use this for the low-fidelity data, but it might come in useful later anyway. 

*Note:* that the purpose here is to give some understanding before the *proper* code is written. This is just a first looksee at what we could do.

In [7]:
def prep_data(
    df: pd.DataFrame, headers: list[str]
) -> np.ndarray:
    """ Prepare the data for surrogate model
    Prepares an np.ndarray[nt, nx] for the Kirger, where nx is in
    the order of the specified headers
    """
    return np.asarray([df[h].to_numpy() for h in headers]).T


**Now let us make the Kirging, without any thought to validation**

In [8]:
lf_x_data = prep_data(lf_data_df, ["discangle", "airspeed", "propspeed"])
lf_y_data = prep_data(lf_data_df, ["load"])
krg_sm.set_training_values(lf_x_data, lf_y_data)

**Now we train the model**

In [9]:
krg_sm.train()

___________________________________________________________________________
   
                                  Kriging
___________________________________________________________________________
   
 Problem size
   
      # training points.        : 216
   
___________________________________________________________________________
   
 Training
   
   Training ...
   Training - done. Time (sec):  0.5306561


**Now let us make some points for interpolation**

In [10]:
# I like to use non-equal dimensions, it helps to make sure plotting is correct!
#n_propspeed = 20
n_discangle = 22
n_airspeed = 21
# we use `*_vec_i` to hint that it's a 1D vector or interp points
discangle_vec_i = np.linspace(-90, 90, n_discangle)
airspeed_vec_i = np.linspace(0, 60, n_airspeed)
plot_rpm = rpms[-1]
propspeed_vec_i = np.asarray(plot_rpm)
# and now we mesh grid, so that we can make a surface
# note that we use `*_mat_i` to show that it's not longer a vector
[discangle_mat_i, airspeed_mat_i, propspeed_mat_i] = np.meshgrid(
    discangle_vec_i, airspeed_vec_i, propspeed_vec_i
)

# data to interpolate
x_data_interp = np.asarray(
    [
        discangle_mat_i.flatten(),
        airspeed_mat_i.flatten(),
        propspeed_mat_i.flatten(),
    ]
).T
print(f"Shape of the data to interpolate is {x_data_interp.shape}")

y_data_interp = krg_sm.predict_values(x_data_interp)


Shape of the data to interpolate is (462, 3)
___________________________________________________________________________
   
 Evaluation
   
      # eval points. : 462
   
   Predicting ...
   Predicting - done. Time (sec):  0.0031981
   
   Prediction time/pt. (sec) :  0.0000069
   


**Now we can finally plot the data**

*Note:* that we first need to reshape the data

In [11]:
y_data_interp_plt = y_data_interp.reshape(n_airspeed, n_discangle)

print(y_data_interp_plt.shape)
print(airspeed_mat_i[:, :, 0].shape)
print(discangle_mat_i[:, :, 0].shape)


(21, 22)
(21, 22)
(21, 22)


In [12]:
fig = go.Figure(
    data=[
        go.Surface(
            z=y_data_interp_plt, y=airspeed_mat_i[:, :, 0], x=discangle_mat_i[:, :, 0]
        ),
        go.Scatter3d(
            x=plot_trend_df.discangle,
            y=plot_trend_df.airspeed,
            z=plot_trend_df.load,
            mode="markers",
            marker=dict(color="LightSkyBlue", size=4),
        ),
    ]
)

fig.update_layout(
    title=f"Interp at prop speed = {plot_rpm} RPM",
    autosize=False,
    width=500,
    height=500,
    margin=dict(l=65, r=50, b=65, t=90),
)

fig.show()


**From the point of view of fitting, this is a very simple example.**

We have a wonderfully smooth funciton, and there is zero noise. In reality we will likely seem some zones which require some smoothing, either due to noise in measurements, or solvers struggling with challenging cases. We will not produce a 'High fidelity data set', which again will be manafactured, however we will put in some noise and some mild discontinuities in the function, perhaps something that will represent stall.

**Let's generate the 'high fidelity' data**

In [18]:
hd_data_df: pd.DataFrame = dummy_prop_example.hf_data(
    lf_independent[:, 0],
    lf_independent[:, 1],
    lf_independent[:, 2],
    1000.0,
    3700.0,
)

display(lf_data_df)

Unnamed: 0,airspeed,discangle,propspeed,load
0,0.0,-90.0,400.0,640.0
1,0.0,-90.0,560.0,1254.4
2,0.0,-90.0,720.0,2073.6
3,0.0,-90.0,880.0,3097.6
4,0.0,-90.0,1040.0,4326.4
...,...,...,...,...
211,60.0,90.0,560.0,12774.4
212,60.0,90.0,720.0,13593.6
213,60.0,90.0,880.0,14617.6
214,60.0,90.0,1040.0,15846.4


**Now lets compare the two datasets, at a common, fixed, propeller speed**