In [2]:
import pandas as pd
import os
import glob
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Rescaling

2024-03-29 14:15:39.103999: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-29 14:15:39.185906: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-29 14:15:39.560415: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Import all data as one dataframe

In [5]:
data_dir = 'training_data/trainingdata_v2/'
all_data = glob.glob(os.path.join(data_dir, "*.csv"))
combined_data = pd.concat([pd.read_csv(file) for file in all_data], ignore_index=True)
combined_data.drop(["Unnamed: 0"], axis = 1)

Unnamed: 0,Wavelength,V1,V2,Gain,Out
0,594.5,0.050000,0.050000,1,-0.000595
1,594.5,0.159722,0.050000,1,-0.000549
2,594.5,0.269444,0.050000,1,-0.000527
3,594.5,0.379167,0.050000,1,-0.000454
4,594.5,0.488889,0.050000,1,-0.000447
...,...,...,...,...,...
1187,543.1,3.816279,0.233721,1,0.000996
1188,543.1,3.862209,0.187791,1,0.001017
1189,543.1,3.908140,0.141860,1,0.001049
1190,543.1,3.954070,0.095930,1,0.001094


Adjusts output voltage for the gain factor

In [3]:
combined_data.loc[0,'Out']
new_out = []
for i in range(len(combined_data)):
    adj_out = combined_data.loc[i,'Out']*combined_data.loc[i,'Gain']
    new_out.append(adj_out)
combined_data['Out'] = new_out

Fitting model as shown in svm_regression_test.ipynb (will go into further detail)

In [68]:
x = combined_data['V1'].to_numpy()
y = combined_data['V2'].to_numpy()
z = combined_data['Gain Volts'].to_numpy(dtype = 'object')

X = np.column_stack((x, y))

param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [0.001, 0.01, 0.1, 1]}
grid_search = GridSearchCV(SVR(kernel='rbf'), param_grid, cv=5)
grid_search.fit(X, z)
best_c = grid_search.best_params_['C']
best_gamma = grid_search.best_params_['gamma']

model = SVR(kernel='rbf', C=best_c, gamma=best_gamma) 
model.fit(X,z)
x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max, 500),
                       np.linspace(x2_min, x2_max, 500))
xx_input = np.column_stack((xx1.ravel(), xx2.ravel()))
y_pred = model.predict(xx_input)

# Attempting Neural Network Fitting
The above fit is a bit rough, and part of the problem is that it's wavelength agnostic. Of course, we do have a wavelength dependency in our data. However, the reason it's wavelength agnostic is because (from what I can gather) getting the predictions we'd like from a 4D SVM regressor is difficult. The current goal is to give two inputs (wavelength and desired polarization angle) and receive the input voltages needed for the LCVR's to produce the angle. Therefore a neural net may be more apt to fit this data.

In [6]:
## Test neural network code created with Gemini (modified slightly by me)

import tensorflow as tf
from tensorflow.keras import layers

# Converts dataframe to tensor
# Note x is "input" wavelength, output voltage, Y is LCVR Volts
x = combined_data.iloc[:, [1, 5]]
y = combined_data.iloc[:, [2, 3]] 
X = tf.convert_to_tensor(x)
Y = tf.convert_to_tensor(y)

# Split into training and validation sets
# Splitting ratio (e.g., 80% for training)
split_ratio = 0.8

# Calculate the split index
split_index = int(split_ratio * len(X))

# Splitting the tensors
X_train = X[:split_index]
X_val = X[split_index:]
Y_train = Y[:split_index]
Y_val = Y[split_index:]

2024-03-29 14:17:53.428567: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:282] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [None]:

# Model Definition
model = tf.keras.Sequential([
  layers.Dense(10, activation='relu', input_shape=(2,)),  # Hidden layer
  layers.Dense(2)  # Output layer with 2 neurons
])

# Compilation
model.compile(optimizer='adam', loss='mean_squared_error')

# Training
model.fit(X_train, Y_train, epochs=1000, validation_data=(X_val, Y_val), verbose = 2)


In [7]:
# Prediction
new_input = tf.constant([[500,-.1]])  # Input with 2 known features
prediction = model.predict(new_input)
print(prediction) 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[[2.6466823  0.46177444]]


This is the first iteration. It's a bit better than SVM I believe but it could use some work. Now going to try tuning the model and such to better fit the data

First scaling the data. Going to try the scaling layer included in Tensorflow.keras. Will scale based on a range from 0 to 1 for both which means we need the max values for each of our inputs.

In [7]:
max_wavelength = x['Wavelength'].max()
min_output = x['Out'].min()
max_output = x['Out'].max()
output_range = max_output - min_output

model = tf.keras.Sequential([
  Rescaling(scale=[1./max_wavelength, 1./output_range], offset=[0, min_output]),
  layers.Dense(10, activation='relu', input_shape=(2,)),  # Hidden layer
  layers.Dense(2)  # Output layer with 2 neurons
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, Y_train, epochs=300, validation_data=(X_val, Y_val), verbose = 2)

Epoch 1/300


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


30/30 - 1s - 29ms/step - loss: 3.5568 - val_loss: 4.5585
Epoch 2/300
30/30 - 0s - 3ms/step - loss: 3.1740 - val_loss: 4.1254
Epoch 3/300
30/30 - 0s - 2ms/step - loss: 2.8661 - val_loss: 3.7544
Epoch 4/300
30/30 - 0s - 2ms/step - loss: 2.6006 - val_loss: 3.4365
Epoch 5/300
30/30 - 0s - 3ms/step - loss: 2.3837 - val_loss: 3.1541
Epoch 6/300
30/30 - 0s - 2ms/step - loss: 2.2084 - val_loss: 2.9087
Epoch 7/300
30/30 - 0s - 2ms/step - loss: 2.0739 - val_loss: 2.6978
Epoch 8/300
30/30 - 0s - 2ms/step - loss: 1.9721 - val_loss: 2.5312
Epoch 9/300
30/30 - 0s - 2ms/step - loss: 1.8965 - val_loss: 2.4009
Epoch 10/300
30/30 - 0s - 2ms/step - loss: 1.8457 - val_loss: 2.2937
Epoch 11/300
30/30 - 0s - 2ms/step - loss: 1.8122 - val_loss: 2.2090
Epoch 12/300
30/30 - 0s - 2ms/step - loss: 1.7856 - val_loss: 2.1365
Epoch 13/300
30/30 - 0s - 2ms/step - loss: 1.7623 - val_loss: 2.0870
Epoch 14/300
30/30 - 0s - 2ms/step - loss: 1.7495 - val_loss: 2.0395
Epoch 15/300
30/30 - 0s - 2ms/step - loss: 1.7348 - va

KeyboardInterrupt: 

This is somewhat helpful, as we have lower RMS error in less time. Now will try adding ReLU layers

In [22]:
max_wavelength = x['Wavelength'].max()
min_output = x['Out'].min()
max_output = x['Out'].max()
output_range = max_output - min_output

model = tf.keras.Sequential([
  Rescaling(scale=[1./max_wavelength, 1./output_range], offset=[0, min_output]),
  layers.Dense(10, activation='relu', input_shape=(2,)),
  layers.Dense(10, activation='relu', input_shape=(2,)),  # Hidden layer
  layers.Dense(2)  # Output layer with 2 neurons
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, Y_train, epochs=300, validation_data=(X_val, Y_val), verbose = 2)

Epoch 1/300
35/35 - 1s - 27ms/step - loss: 3.8983 - val_loss: 4.5996
Epoch 2/300
35/35 - 0s - 2ms/step - loss: 3.6428 - val_loss: 4.3448
Epoch 3/300
35/35 - 0s - 2ms/step - loss: 3.4032 - val_loss: 4.0079
Epoch 4/300
35/35 - 0s - 2ms/step - loss: 3.0920 - val_loss: 3.5524
Epoch 5/300
35/35 - 0s - 2ms/step - loss: 2.7010 - val_loss: 3.0128
Epoch 6/300
35/35 - 0s - 2ms/step - loss: 2.2983 - val_loss: 2.4757
Epoch 7/300
35/35 - 0s - 2ms/step - loss: 1.9639 - val_loss: 2.0556
Epoch 8/300
35/35 - 0s - 2ms/step - loss: 1.7760 - val_loss: 1.8476
Epoch 9/300
35/35 - 0s - 3ms/step - loss: 1.7307 - val_loss: 1.7846
Epoch 10/300
35/35 - 0s - 2ms/step - loss: 1.7247 - val_loss: 1.7780
Epoch 11/300
35/35 - 0s - 2ms/step - loss: 1.7223 - val_loss: 1.7763
Epoch 12/300
35/35 - 0s - 2ms/step - loss: 1.7201 - val_loss: 1.7689
Epoch 13/300
35/35 - 0s - 2ms/step - loss: 1.7175 - val_loss: 1.7754
Epoch 14/300
35/35 - 0s - 2ms/step - loss: 1.7150 - val_loss: 1.7651
Epoch 15/300
35/35 - 0s - 2ms/step - loss:

KeyboardInterrupt: 

This is even better and continually decreases the loss... Sometimes? I need to figure out how to make that more consistent

Rescaling input, target, and validation data. There should be an easier way but I wanted to have the scale factors stored as well for recovery later. (Maybe make this a function when fully implemented?)

NOTE: does not currently store scale factors actually but it does scale properly for now

In [9]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

#Y training
rs_Y_train = tf.concat([
    tf.convert_to_tensor(scaler.fit(Y_train[:,0:1]).transform(Y_train[:, 0:1])),
    tf.convert_to_tensor(scaler.fit(Y_train[:,1:]).transform(Y_train[:, 1:]))
], axis=1)

# X training
rs_X_train = tf.concat([
    tf.convert_to_tensor(scaler.fit(X_train[:,0:1]).transform(X_train[:, 0:1])),
    tf.convert_to_tensor(scaler.fit(X_train[:,1:]).transform(X_train[:, 1:]))
], axis=1)

#Y validation
# Want same scaler on training and validation data
rs_Y_val = tf.concat([
    tf.convert_to_tensor(scaler.fit(Y_train[:,0:1]).transform(Y_val[:, 0:1])),
    tf.convert_to_tensor(scaler.fit(Y_train[:,1:]).transform(Y_val[:, 1:]))
], axis=1)

#X validation
rs_X_val = tf.concat([
    tf.convert_to_tensor(scaler.fit(X_train[:,0:1]).transform(X_val[:, 0:1])),
    tf.convert_to_tensor(scaler.fit(X_train[:,1:]).transform(X_val[:, 1:]))
], axis=1)

In [42]:
model = tf.keras.Sequential([
  layers.Dense(10, activation='relu', input_shape=(2,)),
  layers.Dropout(0.1),
  layers.Dense(10, activation='relu', input_shape=(2,)),  # Hidden layer
  layers.Dropout(0.1),
  layers.Dense(2)  # Output layer with 2 neurons
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(rs_X_train, rs_Y_train, epochs=2000, validation_data=(rs_X_val, rs_Y_val), verbose = 2)

Epoch 1/2000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


35/35 - 1s - 29ms/step - loss: 0.2742 - val_loss: 0.2308
Epoch 2/2000
35/35 - 0s - 2ms/step - loss: 0.1651 - val_loss: 0.1698
Epoch 3/2000
35/35 - 0s - 2ms/step - loss: 0.1390 - val_loss: 0.1429
Epoch 4/2000
35/35 - 0s - 2ms/step - loss: 0.1371 - val_loss: 0.1321
Epoch 5/2000
35/35 - 0s - 3ms/step - loss: 0.1304 - val_loss: 0.1294
Epoch 6/2000
35/35 - 0s - 2ms/step - loss: 0.1250 - val_loss: 0.1274
Epoch 7/2000
35/35 - 0s - 2ms/step - loss: 0.1222 - val_loss: 0.1263
Epoch 8/2000
35/35 - 0s - 2ms/step - loss: 0.1215 - val_loss: 0.1251
Epoch 9/2000
35/35 - 0s - 2ms/step - loss: 0.1199 - val_loss: 0.1244
Epoch 10/2000
35/35 - 0s - 2ms/step - loss: 0.1167 - val_loss: 0.1236
Epoch 11/2000
35/35 - 0s - 2ms/step - loss: 0.1165 - val_loss: 0.1228
Epoch 12/2000
35/35 - 0s - 3ms/step - loss: 0.1136 - val_loss: 0.1216
Epoch 13/2000
35/35 - 0s - 3ms/step - loss: 0.1158 - val_loss: 0.1212
Epoch 14/2000
35/35 - 0s - 3ms/step - loss: 0.1152 - val_loss: 0.1208
Epoch 15/2000
35/35 - 0s - 3ms/step - los

KeyboardInterrupt: 

# Programmatically testing different network layers

Scaling the data has helped dramatically, but it would help to know what kind of layers would be useful. This is a little slow but will be a good way to empirically measure the loss for different functions. Just starting with two layers:

In [56]:
act_funcs = ['leaky_relu','relu','sigmoid','tanh']
losses = []
layernum = 0

for i in range(len(act_funcs)):
    for j in range (len(act_funcs)):
        layernum += 1
        print("Training Layer Combination " + str(layernum) + " out of " + str((len(act_funcs))**2))
        model = tf.keras.Sequential([
        layers.Dense(2),
        layers.Dense(10, activation=act_funcs[i], input_shape=(2,)),
        layers.Dropout(0.1),
        layers.Dense(10, activation='relu', input_shape=(2,)),  # Hidden layer
        layers.Dropout(0.1),
        layers.Dense(2)  # Output layer with 2 neurons
        ])
        model.compile(optimizer='adam', loss='mean_squared_error')
        history = model.fit(rs_X_train, rs_Y_train, epochs=300, validation_data=(rs_X_val, rs_Y_val), verbose = 0)
        final_epoch_loss = history.history['loss'][-1]
        losses.append({'Layer 1' : act_funcs[i], 'Layer 2': act_funcs[j], 'Loss': final_epoch_loss})

Training Layer Combination 1 out of 16


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Layer Combination 2 out of 16
Training Layer Combination 3 out of 16
Training Layer Combination 4 out of 16
Training Layer Combination 5 out of 16
Training Layer Combination 6 out of 16
Training Layer Combination 7 out of 16
Training Layer Combination 8 out of 16
Training Layer Combination 9 out of 16
Training Layer Combination 10 out of 16
Training Layer Combination 11 out of 16
Training Layer Combination 12 out of 16
Training Layer Combination 13 out of 16
Training Layer Combination 14 out of 16
Training Layer Combination 15 out of 16
Training Layer Combination 16 out of 16


In [60]:
lossframe = pd.DataFrame(losses)
lossframe

Unnamed: 0,Layer 1,Layer 2,Loss,Layer 1 Neurons,Layer 2 Neurons
0,leaky_relu,leaky_relu,0.087611,,
1,leaky_relu,relu,0.091828,,
2,leaky_relu,sigmoid,0.08981,,
3,leaky_relu,tanh,0.089719,,
4,relu,leaky_relu,0.091886,,
5,relu,relu,0.092415,,
6,relu,sigmoid,0.094143,,
7,relu,tanh,0.086029,,
8,sigmoid,leaky_relu,0.104498,,
9,sigmoid,relu,0.10468,,


Right now it'slooking like relu -> tanh may be the way to go. Now testing number of neurons. Also basing sequential layers on initial layer size as a 'filtering' method

In [91]:
losses = []
max_neurons = 64
min_neurons = 30
layernum = 0

for i in range(4,8):
    max_neurons = 2**i
    layernum += 1
    print("Training Layer Combination " + str(layernum) + " out of " + str(max_neurons - min_neurons))
    model = tf.keras.Sequential([
    layers.Dense(2),
    layers.Dense(max_neurons, activation='relu', input_shape=(2,)),
    layers.Dropout(0.1),
    layers.Dense(int(max_neurons/2), activation='tanh', input_shape=(2,)), 
    layers.Dropout(0.1),
    layers.Dense(int(max_neurons/4), activation='relu', input_shape=(2,)),
    layers.Dropout(0.1),
    layers.Dense(2) 
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    history = model.fit(rs_X_train, rs_Y_train, epochs=300, validation_data=(rs_X_val, rs_Y_val), verbose = 0)
    final_epoch_loss = history.history['loss'][-1]
    losses.append({'Layer 1 Neurons' : i, 'Layer 2 Neurons': str(i/2),'Layer 3 Neurons': str(i/4), 'Loss': final_epoch_loss})

Training Layer Combination 1 out of -14
Training Layer Combination 2 out of 2
Training Layer Combination 3 out of 34
Training Layer Combination 4 out of 98


In [92]:
lossframe = pd.DataFrame(losses)
lossframe

Unnamed: 0,Layer 1 Neurons,Layer 2 Neurons,Layer 3 Neurons,Loss
0,4,2.0,1.0,0.094603
1,5,2.5,1.25,0.084221
2,6,3.0,1.5,0.078768
3,7,3.5,1.75,0.074874


In [41]:
rs_X_train_shape = tf.reshape(rs_X_train, [len(rs_X_train),1, 2])
rs_Y_train_shape = tf.reshape(rs_Y_train, [len(rs_Y_train),1, 2])
rs_X_val_shape = tf.reshape(rs_X_val, [len(rs_X_val),1, 2])
rs_Y_val_shape = tf.reshape(rs_Y_val, [len(rs_Y_val),1, 2])
rs_X_train_shape

<tf.Tensor: shape=(953, 1, 2), dtype=float64, numpy=
array([[[1.        , 0.9226546 ]],

       [[1.        , 0.92469183]],

       [[1.        , 0.92565864]],

       ...,

       [[0.43267108, 0.95759815]],

       [[0.43267108, 0.95773627]],

       [[0.43267108, 0.95797797]]])>

In [46]:
max_neurons = 2**10
drop_time = 0.2
model = tf.keras.Sequential([
        layers.Dense(2),
        layers.Conv1D(filters=32, kernel_size=1, activation='relu', input_shape=(1,2)),
        layers.Dense(max_neurons, activation='relu', input_shape=(2,)),
        layers.Dropout(drop_time),
        layers.Dense(int(max_neurons/2), activation='tanh', input_shape=(2,)), 
        layers.Dropout(drop_time),
        layers.Dense(int(max_neurons/4), activation='relu', input_shape=(2,)),
        layers.Dropout(drop_time),
        layers.Dense(2) 
        ])
model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(rs_X_train_shape, rs_Y_train_shape, epochs=2000, validation_data=(rs_X_val_shape, rs_Y_val_shape), verbose = 2)

Epoch 1/2000


2024-03-29 20:15:24.023063: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 67108864 exceeds 10% of free system memory.
2024-03-29 20:15:24.059737: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 67108864 exceeds 10% of free system memory.
2024-03-29 20:15:24.096446: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 67108864 exceeds 10% of free system memory.
2024-03-29 20:15:24.136460: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 67108864 exceeds 10% of free system memory.
2024-03-29 20:15:24.172665: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 67108864 exceeds 10% of free system memory.


30/30 - 3s - 112ms/step - loss: 0.1266 - val_loss: 0.1576
Epoch 2/2000
30/30 - 1s - 39ms/step - loss: 0.1089 - val_loss: 0.1699
Epoch 3/2000
30/30 - 1s - 40ms/step - loss: 0.1063 - val_loss: 0.1887
Epoch 4/2000
30/30 - 1s - 40ms/step - loss: 0.1005 - val_loss: 0.2246
Epoch 5/2000
30/30 - 1s - 40ms/step - loss: 0.0971 - val_loss: 0.1967
Epoch 6/2000


KeyboardInterrupt: 

Need to now undo scaling for predictions to actually know what's happening. This will *definitely* just be a function in lcvr_learning when this is all implemented there

In [120]:
pred_wave = [[480]]
pred_volts =[[0.03]]

rs_input = tf.concat([
    tf.convert_to_tensor(scaler.fit(X_train[:,0:1]).transform(pred_wave)),
    tf.convert_to_tensor(scaler.fit(X_train[:,1:]).transform(pred_volts))
], axis=1)

prediction = model.predict(rs_input)

rs_output = tf.concat([
    tf.convert_to_tensor(scaler.fit(Y_train[:,0:1]).inverse_transform([[prediction[0,0]]])),
    tf.convert_to_tensor(scaler.fit(Y_train[:,1:]).inverse_transform([[prediction[0,1]]]))
], axis=1)
print(rs_output) 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
tf.Tensor([[0.33869303 3.15758747]], shape=(1, 2), dtype=float64)


In [15]:
tf.convert_to_tensor(scaler.fit(Y_train[:,0:1]).inverse_transform([[.0687]]))

<tf.Tensor: shape=(1, 1), dtype=float64, numpy=array([[0.321365]])>

In [30]:
rs_X_train

<tf.Tensor: shape=(953, 2), dtype=float64, numpy=
array([[1.        , 0.9226546 ],
       [1.        , 0.92469183],
       [1.        , 0.92565864],
       ...,
       [0.43267108, 0.95759815],
       [0.43267108, 0.95773627],
       [0.43267108, 0.95797797]])>