In [19]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [20]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from keras.applications import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input
from keras.models import Model

# Step 1: Loading Labels

label_file_path = "/content/drive/MyDrive/Oil_GVF_prediction_CNN_models/gas_label.csv"
labels_df = pd.read_csv(label_file_path)

# Step 2: Feature Extraction

# Load pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False)

# Remove the last layer of ResNet50 and use the output of the second last layer as features
model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

# Define a function to extract features from images
def extract_features(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features.flatten()


In [21]:
# Step 3: Data Labeling

# Merge labels with image paths
data = []
for index, row in labels_df.iterrows():
    experiment_name = row['Experiment Number']
    bg_flow = row['Background Flow (m3/h)']
    gvf = row['GVF(%)']
    folder_path = os.path.join("/content/drive/MyDrive/Oil_GVF_prediction_CNN_models/STFT_domain_Oil_gas_D_top_left_2_sec_img", experiment_name)
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        features = extract_features(img_path)
        data.append([experiment_name, bg_flow, gvf, img_path] + features.tolist())




In [22]:
# Create DataFrame
columns = ['Experiment Number', 'Background Flow', 'GVF', 'Image Path'] + [f'feature_{i}' for i in range(100352)]
df = pd.DataFrame(data, columns=columns)

In [23]:
df.head()

Unnamed: 0,Experiment Number,Background Flow,GVF,Image Path,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,...,feature_100342,feature_100343,feature_100344,feature_100345,feature_100346,feature_100347,feature_100348,feature_100349,feature_100350,feature_100351
0,be22003_028,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-0.99979,0.506254,-0.317241,0.405707,-2.478089,-0.729442,...,-1.402889,-3.023046,-2.648395,-2.30293,-2.35374,-3.826228,1.095279,-2.061479,-2.652443,-2.299287
1,be22003_028,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-0.634048,-0.577673,-0.706749,-0.495804,-3.088602,-2.258239,...,-1.336763,-3.390615,-2.751126,-2.094326,-2.598576,-3.662185,0.857412,-1.572023,-2.694276,-2.330959
2,be22003_028,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-0.512231,0.083559,-0.614701,0.06714,-2.390521,-1.768111,...,-1.348452,-2.647274,-2.565055,-2.916632,-1.537546,-2.567406,-2.262771,-2.384289,-2.374598,-2.164381
3,be22003_028,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-1.33798,-1.280775,-0.883589,-0.427804,-2.393467,-0.26355,...,-1.667314,-2.709171,-2.772267,-2.678916,-1.360643,-2.392461,-2.221828,-2.624086,-2.523921,-2.126632
4,be22003_028,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-0.945582,-1.645193,-0.836164,-0.353569,-2.618023,-0.779637,...,-1.329391,-3.192986,-2.886902,-2.920386,-1.821085,-3.230479,-1.80298,-2.093008,-2.276181,-1.736622


In [24]:
# Step 4: Model Training

# Separate features and labels
X = df.iloc[:, 4:]  # Features
y_bg_flow = df['Background Flow']  # Background Flow
y_gvf = df['GVF']  # GVF

In [25]:
# Split data into train and test sets
X_train, X_test, y_train_bg_flow, y_test_bg_flow, y_train_gvf, y_test_gvf = train_test_split(X, y_bg_flow, y_gvf, test_size=0.2, random_state=42)


In [26]:
# Step 5: Model Selection and Training

# Random Forest Regressor for Background Flow
rf_bg_flow = RandomForestRegressor(n_estimators=100, random_state=42)
rf_bg_flow.fit(X_train, y_train_bg_flow)

# Random Forest Regressor for GVF
rf_gvf = RandomForestRegressor(n_estimators=100, random_state=42)
rf_gvf.fit(X_train, y_train_gvf)

In [27]:
# Step 6: Model Evaluation

# Predictions for Background Flow
y_pred_bg_flow = rf_bg_flow.predict(X_test)
mae_bg_flow = mean_absolute_error(y_test_bg_flow, y_pred_bg_flow)
print("MAE for Background Flow:", mae_bg_flow)

# Predictions for GVF
y_pred_gvf = rf_gvf.predict(X_test)
mae_gvf = mean_absolute_error(y_test_gvf, y_pred_gvf)
print("MAE for GVF:", mae_gvf)


MAE for Background Flow: 10.213068181818182
MAE for GVF: 13.64693181818182


In [28]:
import math
# Function to predict Background Flow and GVF for a single image
def predict_values_for_image(img_path):
    # Extract features from the image
    features = extract_features(img_path)

    # Reshape the features to match the input shape expected by the model
    features = features.reshape(1, -1)

    # Predict Background Flow and GVF values
    bg_flow_prediction = rf_bg_flow.predict(features)
    gvf_prediction = rf_gvf.predict(features)

    return bg_flow_prediction[0], gvf_prediction[0]

In [33]:

new_image_path = "/content/drive/MyDrive/Oil_GVF_prediction_CNN_models/STFT_domain_Oil_gas_D_top_left_2_sec_img/be22003_028/15_02_2022_15_23_50_00_hdf5_cropped_1.png"
bg_flow_pred, gvf_pred = predict_values_for_image(new_image_path)

print("Predicted Background Flow:", math.ceil(bg_flow_pred))
print("Predicted GVF:", math.ceil(gvf_pred))


Predicted Background Flow: 12
Predicted GVF: 2


