In [1]:
pip install pandas scikit-learn openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [4]:
file_path = '/Users/zlatastefanovic/Desktop/Internship/BubblesDataset.xlsx'
data = pd.read_excel(file_path)

In [5]:
print(data.head())

  Type of bubble  Pressure (bar)  KPi electrolyte concentration (M)  \
0             H2               1                                2.0   
1             H2               1                                2.0   
2             H2               1                                2.0   
3             H2               2                                2.0   
4             H2               2                                2.0   

   Current density (mA/cm2)  Number of bubbles (-)  \
0                      1.25               75.33333   
1                      1.88              309.66667   
2                      2.50             1392.33333   
3                      1.25              114.00000   
4                      1.88              353.33333   

   Stdev of Number of bubbles (-)  Diameter of bubbles (mm)  \
0                         7.76745                   0.12902   
1                        34.67468                   0.08179   
2                       105.26316                   0.05580

In [18]:
O2data = data[data['Type of bubble'] == 'O2']

In [19]:
H2data = data[data['Type of bubble'] == 'H2']

In [36]:
print(H2data.head())

  Type of bubble  Pressure (bar)  KPi electrolyte concentration (M)  \
0             H2               1                                2.0   
1             H2               1                                2.0   
2             H2               1                                2.0   
3             H2               2                                2.0   
4             H2               2                                2.0   

   Current density (mA/cm2)  Number of bubbles (-)  \
0                      1.25               75.33333   
1                      1.88              309.66667   
2                      2.50             1392.33333   
3                      1.25              114.00000   
4                      1.88              353.33333   

   Stdev of Number of bubbles (-)  Diameter of bubbles (mm)  \
0                         7.76745                   0.12902   
1                        34.67468                   0.08179   
2                       105.26316                   0.05580

In [6]:
X = data[['Pressure (bar)', 'KPi electrolyte concentration (M)', 'Current density (mA/cm2)', 'Number of bubbles (-)', 'Diameter of bubbles (mm)']]
y = data['Optical loss (%)']

In [20]:
XO2 = O2data[['Pressure (bar)', 'KPi electrolyte concentration (M)', 'Current density (mA/cm2)', 'Number of bubbles (-)', 'Diameter of bubbles (mm)']]
yO2 = O2data['Optical loss (%)']

In [21]:
XH2 = H2data[['Pressure (bar)', 'KPi electrolyte concentration (M)', 'Current density (mA/cm2)', 'Number of bubbles (-)', 'Diameter of bubbles (mm)']]
yH2 = H2data['Optical loss (%)']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
XH2_train, XH2_test, yH2_train, yH2_test = train_test_split(XH2, yH2, test_size=0.2, random_state=42)

In [23]:
XO2_train, XO2_test, yO2_train, yO2_test = train_test_split(XO2, yO2, test_size=0.2, random_state=42)

In [9]:
model = LinearRegression()
model.fit(X_train, y_train)

In [24]:
modelH2 = LinearRegression()
modelH2.fit(XH2_train, yH2_train)

In [25]:
modelO2 = LinearRegression()
modelO2.fit(XO2_train, yO2_train)

In [10]:
y_pred = model.predict(X_test)

In [26]:
yO2_pred = modelO2.predict(XO2_test)

In [27]:
yH2_pred = modelH2.predict(XH2_test)

In [14]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [28]:
mseH2 = mean_squared_error(yH2_test, yH2_pred)
r2H2 = r2_score(yH2_test, yH2_pred)

In [29]:
mseO2 = mean_squared_error(yO2_test, yO2_pred)
r2O2 = r2_score(yO2_test, yO2_pred)

In [31]:
print(f'Mean Squared Error: {mse}')
print(f'R² Score: {r2}')

Mean Squared Error: 5.291669678705896
R² Score: 0.6549751341300971


In [32]:
print(f'Mean Squared Error - H2: {mseH2}')
print(f'R² Score - H2: {r2H2}')

Mean Squared Error - H2: 1.5524876559162588
R² Score - H2: 0.4531466300095093


In [33]:
print(f'Mean Squared Error - O2: {mseO2}')
print(f'R² Score - O2: {r2O2}')

Mean Squared Error - O2: 3.0090220308487914
R² Score - O2: 0.7477834952442922


In [13]:
coefficients = pd.DataFrame(model.coef_, X.columns, columns=['Coefficient'])
print(coefficients)

                                   Coefficient
Pressure (bar)                       -1.671403
KPi electrolyte concentration (M)     3.233645
Current density (mA/cm2)              1.819207
Number of bubbles (-)                 0.006027
Diameter of bubbles (mm)             13.685175


In [34]:
coefficientsH2 = pd.DataFrame(modelH2.coef_, XH2.columns, columns=['Coefficient'])
print(coefficientsH2)

                                   Coefficient
Pressure (bar)                       -1.803642
KPi electrolyte concentration (M)     0.132788
Current density (mA/cm2)              0.323662
Number of bubbles (-)                 0.008315
Diameter of bubbles (mm)             -1.728666


In [35]:
coefficientsO2 = pd.DataFrame(modelO2.coef_, XO2.columns, columns=['Coefficient'])
print(coefficientsO2)

                                   Coefficient
Pressure (bar)                       -1.630778
KPi electrolyte concentration (M)     8.180875
Current density (mA/cm2)              2.711347
Number of bubbles (-)                -0.001360
Diameter of bubbles (mm)             33.765008


In [None]:
XH2 = H2data[['Pressure (bar)', 'KPi electrolyte concentration (M)', 'Current density (mA/cm2)']]
yH2 = H2data['Optical loss (%)']