# Active learning for the expansion joint

In [8]:
# Import required packages
from alcfd import *
import warnings
warnings.filterwarnings('ignore')

In [9]:
# Load training data
input_file = 'expansion_train.dat'
data = pd.read_csv(input_file, delimiter=' ')
data = data.rename(columns={'Backflow percent': 'backflow_percent'})
#convert units
data['angle'] = np.round(np.radians(data['exp_angle']), decimals=4)
data['inlet_D'] = data['inlet_D'] * 1000 #converting to mm
data['inlet_V'] = data['inlet_V'] * 1000
data['Density'] = data['Density'] * 10 ** -6 # converting to g/mm3
# Isolate test data
test_data = data.loc[ (data['exp_angle'] > 50) & (data['inlet_D'] > 0.0025)]
X_test = np.stack((test_data['inlet_D'].astype('float'), 
                   test_data['angle'].astype('float'), 
                   test_data['inlet_V'].astype('float')), axis=-1)
y_test = np.array(test_data['backflow_percent']).astype(float)
# Rest is the training pool
train_data = data.loc[~((data['exp_angle'] > 50) & (data['inlet_D'] > 0.0025) )]

In [12]:
# Whatt are the features and labels?
features=['inlet_D', 'angle', 'inlet_V' ]
label = 'backflow_percent'

# This code produces training sets for 5, 10,...,20 active learned points, with 5 runs for each
for i, j in enumerate(np.arange(5, 25, 5)):
    for k in range(5):
        X_train, y_train, X_pool, y_pool = sample_initial_training_points(train_data, features, label)
        regressor_3d = active_learner(X_train, y_train)
        N_QUERIES = j
        Xs, ys, X_val, y_val = query_data(N_QUERIES, X_pool, y_pool, regressor_3d, X_test, y_test)
        directory = create_train_dat(f'expansion_al_example/points{j}_{k}/', Xs, ys, features, label, density=1060., viscosity=0.004)
print(f'Done!')


Done!
