In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from keras.applications import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input
from keras.models import Model

# Step 1: Loading Labels

label_file_path = "/content/drive/MyDrive/Oil_GVF_prediction_CNN_models/oil_water_label.csv"
labels_df = pd.read_csv(label_file_path)

# Step 2: Feature Extraction

# Load pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False)

# Remove the last layer of ResNet50 and use the output of the second last layer as features
model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

# Define a function to extract features from images
def extract_features(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features.flatten()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [7]:
# Step 3: Data Labeling

# Merge labels with image paths
data = []
for index, row in labels_df.iterrows():
    experiment_name = row['Experiment Number']
    bg_flow = row['Background Flow (m3/h)']
    WaterCut = row['water cut (%)']
    folder_path = os.path.join("/content/drive/MyDrive/Oil_GVF_prediction_CNN_models/stft_oil_water", experiment_name)
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        features = extract_features(img_path)
        data.append([experiment_name, bg_flow, WaterCut, img_path] + features.tolist())




In [8]:
# Create DataFrame
columns = ['Experiment Number', 'Background Flow', 'WaterCut', 'Image Path'] + [f'feature_{i}' for i in range(100352)]
df = pd.DataFrame(data, columns=columns)

In [9]:
df.head()

Unnamed: 0,Experiment Number,Background Flow,WaterCut,Image Path,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,...,feature_100342,feature_100343,feature_100344,feature_100345,feature_100346,feature_100347,feature_100348,feature_100349,feature_100350,feature_100351
0,be22007_000,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.704112,-2.407605,-1.657305,-0.571712,-3.03298,-0.431641,...,-0.314348,-5.165875,-3.773847,-2.038293,1.057652,-3.201055,0.043826,-0.798262,1.928447,4.342771
1,be22007_000,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-2.00742,-1.857557,-1.78236,0.409304,-2.429142,-0.547343,...,1.781727,-4.8625,-2.713357,-1.649416,-0.065765,-3.518343,-0.412726,-1.092676,0.242206,3.830615
2,be22007_000,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.796762,-1.993771,-1.962892,2.325828,-2.645463,1.255233,...,3.950834,-4.678708,-3.662419,-1.379766,0.623397,-3.29128,-0.203477,-1.210324,0.867371,6.686132
3,be22007_000,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.737471,-1.726743,-2.082922,2.394396,-1.945735,2.245593,...,1.907041,-4.23123,-3.241199,-1.708435,0.100133,-3.730419,0.004276,-1.048185,0.479222,3.981187
4,be22007_000,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.770987,-1.801598,-1.730463,0.996756,-1.956371,-0.065137,...,1.321373,-4.036424,-2.983791,-1.512003,0.127408,-3.245956,-1.111794,-1.402104,1.085548,4.464328


In [10]:
df

Unnamed: 0,Experiment Number,Background Flow,WaterCut,Image Path,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,...,feature_100342,feature_100343,feature_100344,feature_100345,feature_100346,feature_100347,feature_100348,feature_100349,feature_100350,feature_100351
0,be22007_000,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.704112,-2.407605,-1.657305,-0.571712,-3.032980,-0.431641,...,-0.314348,-5.165875,-3.773847,-2.038293,1.057652,-3.201055,0.043826,-0.798262,1.928447,4.342771
1,be22007_000,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-2.007420,-1.857557,-1.782360,0.409304,-2.429142,-0.547343,...,1.781727,-4.862500,-2.713357,-1.649416,-0.065765,-3.518343,-0.412726,-1.092676,0.242206,3.830615
2,be22007_000,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.796762,-1.993771,-1.962892,2.325828,-2.645463,1.255233,...,3.950834,-4.678708,-3.662419,-1.379766,0.623397,-3.291280,-0.203477,-1.210324,0.867371,6.686132
3,be22007_000,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.737471,-1.726743,-2.082922,2.394396,-1.945735,2.245593,...,1.907041,-4.231230,-3.241199,-1.708435,0.100133,-3.730419,0.004276,-1.048185,0.479222,3.981187
4,be22007_000,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.770987,-1.801598,-1.730463,0.996756,-1.956371,-0.065137,...,1.321373,-4.036424,-2.983791,-1.512003,0.127408,-3.245956,-1.111794,-1.402104,1.085548,4.464328
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,be22008_006,30,40,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-0.927347,-2.351417,-0.766959,-1.819230,-1.865539,-2.171415,...,-1.037999,-5.150139,-3.148723,-1.169032,2.118442,-2.985925,-0.686486,-1.770743,2.296762,3.138298
127,be22008_006,30,40,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.343227,-2.112404,-0.599805,-2.109538,-2.254396,-1.685828,...,0.393618,-5.420557,-3.386472,-1.240297,0.188536,-3.319450,-0.580546,-2.002609,0.849668,0.366454
128,be22008_007,30,60,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.055092,-1.794115,-1.556356,-1.918940,-2.319692,-2.037765,...,0.952077,-4.305146,-1.979046,-1.612211,1.118734,-4.054163,-1.078200,-1.836268,2.580209,2.077742
129,be22008_007,30,60,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.001012,-2.081178,-1.062793,-1.344202,-1.550028,-1.977974,...,0.200619,-4.406155,-2.811883,-1.310474,2.506424,-3.725084,-0.972366,-1.288592,1.891505,4.534843


In [11]:
# Step 4: Model Training

# Separate features and labels
X = df.iloc[:, 4:]  # Features
y_bg_flow = df['Background Flow']  # Background Flow
y_WaterCut = df['WaterCut']  # Water cut

In [12]:
# Split data into train and test sets
X_train, X_test, y_train_bg_flow, y_test_bg_flow, y_train_WaterCut, y_test_WaterCut = train_test_split(X, y_bg_flow, y_WaterCut, test_size=0.2, random_state=42)


In [13]:
# Step 5: Model Selection and Training

# Random Forest Regressor for Background Flow
rf_bg_flow = RandomForestRegressor(n_estimators=100, random_state=42)
rf_bg_flow.fit(X_train, y_train_bg_flow)

# Random Forest Regressor for GVF
rf_WaterCut = RandomForestRegressor(n_estimators=100, random_state=42)
rf_WaterCut.fit(X_train, y_train_WaterCut)

In [14]:
# Step 6: Model Evaluation

# Predictions for Background Flow
y_pred_bg_flow = rf_bg_flow.predict(X_test)
mae_bg_flow = mean_absolute_error(y_test_bg_flow, y_pred_bg_flow)
print("MAE for Background Flow:", mae_bg_flow)

# Predictions for GVF
y_pred_WaterCut = rf_WaterCut.predict(X_test)
mae_WaterCut = mean_absolute_error(y_test_WaterCut, y_pred_WaterCut)
print("MAE for Water CUT:", mae_WaterCut)


MAE for Background Flow: 5.212962962962963
MAE for Water CUT: 15.420740740740738


In [19]:
import math
# Function to predict Background Flow and GVF for a single image
def predict_values_for_image(img_path):
    # Extract features from the image
    features = extract_features(img_path)

    # Reshape the features to match the input shape expected by the model
    features = features.reshape(1, -1)

    # Predict Background Flow and GVF values
    bg_flow_prediction = rf_bg_flow.predict(features)
    WaterCut_prediction = rf_WaterCut.predict(features)

    return bg_flow_prediction[0], WaterCut_prediction[0]

In [20]:

new_image_path = "/content/drive/MyDrive/Oil_GVF_prediction_CNN_models/stft_oil_water/be22007_008/21_02_2022_12_24_18_00_hdf5.png"
bg_flow_pred, WaterCut_pred = predict_values_for_image(new_image_path)

print("Predicted Background Flow:", math.ceil(bg_flow_pred))
print("Predicted GVF:", math.ceil(WaterCut_pred))


Predicted Background Flow: 8
Predicted GVF: 67


