In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from keras.applications import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input
from keras.models import Model

# Step 1: Loading Labels

label_file_path = "/content/drive/MyDrive/Oil_GVF_prediction_CNN_models/gas_label.csv"
labels_df = pd.read_csv(label_file_path)

# Step 2: Feature Extraction

# Load pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False)

# Remove the last layer of ResNet50 and use the output of the second last layer as features
model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

# Define a function to extract features from images
def extract_features(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features.flatten()


In [6]:
# Step 3: Data Labeling

# Merge labels with image paths
data = []
for index, row in labels_df.iterrows():
    experiment_name = row['Experiment Number']
    bg_flow = row['Background Flow (m3/h)']
    gvf = row['GVF(%)']
    folder_path = os.path.join("/content/drive/MyDrive/Oil_GVF_prediction_CNN_models/STFT_domain_Oil_gas_D_top_left", experiment_name)
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        features = extract_features(img_path)
        data.append([experiment_name, bg_flow, gvf, img_path] + features.tolist())




In [7]:
# Create DataFrame
columns = ['Experiment Number', 'Background Flow', 'GVF', 'Image Path'] + [f'feature_{i}' for i in range(100352)]
df = pd.DataFrame(data, columns=columns)

In [8]:
df.head()

Unnamed: 0,Experiment Number,Background Flow,GVF,Image Path,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,...,feature_100342,feature_100343,feature_100344,feature_100345,feature_100346,feature_100347,feature_100348,feature_100349,feature_100350,feature_100351
0,be22003_028,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,0.132587,1.376482,-0.934627,-0.17877,-2.135124,-0.374624,...,-1.118556,-2.986799,-2.521429,-2.957002,-1.524404,-3.209066,-1.641731,-2.349093,-2.410657,-2.298179
1,be22003_028,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-0.095558,-0.121811,-1.48539,-0.435379,-3.471271,-0.523385,...,-0.938868,-2.833332,-2.70369,-2.402027,-2.351091,-4.215418,0.056732,-2.008089,-2.557273,-1.90927
2,be22003_028,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,0.028993,0.42661,-1.060178,-0.86583,-2.903509,-2.744921,...,-0.910131,-2.697102,-2.136266,-2.249949,-2.059119,-4.283205,-0.015953,-2.297694,-2.547785,-2.040889
3,be22003_028,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-0.108336,1.139142,-0.360604,-0.583174,-3.333948,-2.12285,...,-0.999915,-3.248482,-2.340247,-2.837412,-1.970479,-3.074052,-1.404666,-1.904213,-2.45937,-2.396137
4,be22003_028,5,0,/content/drive/MyDrive/Oil_GVF_prediction_CNN_...,-0.380471,1.376995,-0.724795,0.496344,-2.970712,-0.611361,...,-1.001638,-2.864774,-2.748161,-2.316119,-2.21664,-3.737882,-0.284259,-2.149905,-2.352108,-2.064563


In [10]:
# Step 4: Model Training

# Separate features and labels
X = df.iloc[:, 4:]  # Features
y_bg_flow = df['Background Flow']  # Background Flow
y_gvf = df['GVF']  # GVF

In [11]:
# Split data into train and test sets
X_train, X_test, y_train_bg_flow, y_test_bg_flow, y_train_gvf, y_test_gvf = train_test_split(X, y_bg_flow, y_gvf, test_size=0.2, random_state=42)


In [12]:
# Step 5: Model Selection and Training

# Random Forest Regressor for Background Flow
rf_bg_flow = RandomForestRegressor(n_estimators=100, random_state=42)
rf_bg_flow.fit(X_train, y_train_bg_flow)

# Random Forest Regressor for GVF
rf_gvf = RandomForestRegressor(n_estimators=100, random_state=42)
rf_gvf.fit(X_train, y_train_gvf)

In [13]:
# Step 6: Model Evaluation

# Predictions for Background Flow
y_pred_bg_flow = rf_bg_flow.predict(X_test)
mae_bg_flow = mean_absolute_error(y_test_bg_flow, y_pred_bg_flow)
print("MAE for Background Flow:", mae_bg_flow)

# Predictions for GVF
y_pred_gvf = rf_gvf.predict(X_test)
mae_gvf = mean_absolute_error(y_test_gvf, y_pred_gvf)
print("MAE for GVF:", mae_gvf)


MAE for Background Flow: 7.907894736842105
MAE for GVF: 12.91263157894737


In [14]:
import math
# Function to predict Background Flow and GVF for a single image
def predict_values_for_image(img_path):
    # Extract features from the image
    features = extract_features(img_path)

    # Reshape the features to match the input shape expected by the model
    features = features.reshape(1, -1)

    # Predict Background Flow and GVF values
    bg_flow_prediction = rf_bg_flow.predict(features)
    gvf_prediction = rf_gvf.predict(features)

    return bg_flow_prediction[0], gvf_prediction[0]

In [17]:

new_image_path = "/content/drive/MyDrive/Oil_GVF_prediction_CNN_models/STFT_domain_Oil_gas_D_top_left/be22004_016/16_02_2022_12_36_51_00_hdf5.png"
bg_flow_pred, gvf_pred = predict_values_for_image(new_image_path)

print("Predicted Background Flow:", math.ceil(bg_flow_pred))
print("Predicted GVF:", math.ceil(gvf_pred))


Predicted Background Flow: 23
Predicted GVF: 26


