In [5]:
import pandas as pd
import os
import glob
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt

Import all data as one dataframe

In [6]:
data_dir = 'trainingdata_new'
all_data = glob.glob(os.path.join(data_dir, "*.csv"))
combined_data = pd.concat([pd.read_csv(file) for file in all_data], ignore_index=True)
combined_data.drop(["Unnamed: 0"], axis = 1)

Unnamed: 0,Wavelength,V1,V2,Gain,Out
0,495,0.050000,0.050000,4,0.072094
1,495,0.130612,0.050000,4,0.070441
2,495,0.211224,0.050000,4,0.072832
3,495,0.291837,0.050000,4,0.073059
4,495,0.372449,0.050000,4,0.073020
...,...,...,...,...,...
1395,590,3.677551,0.372449,4,0.061949
1396,590,3.758163,0.291837,4,0.062184
1397,590,3.838776,0.211224,4,0.063187
1398,590,3.919388,0.130612,4,0.063203


Adjusts output voltage for the gain factor

In [7]:
combined_data.loc[0,'Out']
new_out = []
for i in range(len(combined_data)):
    adj_out = combined_data.loc[i,'Out']*combined_data.loc[i,'Gain']
    new_out.append(adj_out)
combined_data['Out'] = new_out

Fitting model as shown in svm_regression_test.ipynb (will go into further detail)

In [68]:
x = combined_data['V1'].to_numpy()
y = combined_data['V2'].to_numpy()
z = combined_data['Gain Volts'].to_numpy(dtype = 'object')

X = np.column_stack((x, y))

param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [0.001, 0.01, 0.1, 1]}
grid_search = GridSearchCV(SVR(kernel='rbf'), param_grid, cv=5)
grid_search.fit(X, z)
best_c = grid_search.best_params_['C']
best_gamma = grid_search.best_params_['gamma']

model = SVR(kernel='rbf', C=best_c, gamma=best_gamma) 
model.fit(X,z)
x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max, 500),
                       np.linspace(x2_min, x2_max, 500))
xx_input = np.column_stack((xx1.ravel(), xx2.ravel()))
y_pred = model.predict(xx_input)

# Attempting Neural Network Fitting
The above fit is a bit rough, and part of the problem is that it's wavelength agnostic. Of course, we do have a wavelength dependency in our data. However, the reason it's wavelength agnostic is because (from what I can gather) getting the predictions we'd like from a 4D SVM regressor is difficult. The current goal is to give two inputs (wavelength and desired polarization angle) and receive the input voltages needed for the LCVR's to produce the angle. Therefore a neural net may be more apt to fit this data.

In [1]:
## Test neural network code created with Gemini (modified slightly by me)

import tensorflow as tf
from tensorflow.keras import layers

# Converts dataframe to tensor
# Note x is input LCVR Volts, and Y is wavelength, output voltage
x = combined_data.iloc[:, [2, 3]]
y = combined_data.iloc[:, [1, 5]] 
X = tf.convert_to_tensor(x)
Y = tf.convert_to_tensor(y)

# Split into training and validation sets
# Splitting ratio (e.g., 80% for training)
split_ratio = 0.8

# Calculate the split index
split_index = int(split_ratio * len(X))

# Splitting the tensors
X_train = X[:split_index]
X_val = X[split_index:]
Y_train = Y[:split_index]
Y_val = Y[split_index:]

# Model Definition
model = tf.keras.Sequential([
  layers.Dense(10, activation='relu', input_shape=(2,)),  # Hidden layer
  layers.Dense(2)  # Output layer with 2 neurons
])

# Compilation
model.compile(optimizer='adam', loss='mean_squared_error')

# Training
model.fit(X_train, Y_train, epochs=20, validation_data=(X_val, Y_val)) 


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# Prediction
new_input = np.array([[1, 2, 3, 4]])  # Input with 2 known features
prediction = model.predict(new_input)
print(prediction) 

In [25]:
import tensorflow as tf
from tensorflow.keras import layers

x = combined_data.iloc[:, [2, 3]]
y = combined_data.iloc[:, [1, 5]] 
X = tf.convert_to_tensor(x)
Y = tf.convert_to_tensor(y)

In [27]:
x.head()

Unnamed: 0,V1,V2
0,0.05,0.05
1,0.130612,0.05
2,0.211224,0.05
3,0.291837,0.05
4,0.372449,0.05
