In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import requests
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

# Load the CSV data
# Load the first CSV file
data1 = pd.read_csv('data_well.csv')

# Load the second CSV file
data2 = pd.read_csv('data_well2.csv')

data3 = pd.read_csv('data_well3.csv')

# Combine the two DataFrames
data = pd.concat([data1, data2, data3], ignore_index=True)

# Create the 'previous_failure' column and drop rows with missing values
data['previous_failure'] = data['failure'].shift(1)
data = data.dropna(subset=['previous_failure'])

# Features and target
X = data.drop(['failure', 'sessionID', 'score', 'turn', 'previous_failure', 'team', 'Cuttings', 'Precipitation','MUD_Density_Rating', 'Permeability_Rating', 'Rotary_Speed', 'Rotary_Torque'], axis=1)
y = data['failure']

# Specify categorical and numerical features
categorical_features = [
 'Casing Integrity'
]
numerical_features = X.columns.difference(categorical_features)


preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])


class_weights = {False: 1, True: 1.8}

# Create a pipeline with preprocessing and SVC
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', SVC(class_weight=class_weights))
    # ('classifier', KNeighborsClassifier())
    # ('classifier', LogisticRegression(class_weight=class_weights))
])

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Train the model
pipeline.fit(X_train, y_train)

# Predict on test data
y_pred = pipeline.predict(X_test)



# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("SVC model accuracy (in %):", accuracy * 100)
# print("\nClassification Report:\n", classification_report(y_test, y_pred))

SVC model accuracy (in %): 78.20738137082601

Classification Report:
               precision    recall  f1-score   support

       False       0.78      1.00      0.88       445
        True       0.00      0.00      0.00       124

    accuracy                           0.78       569
   macro avg       0.39      0.50      0.44       569
weighted avg       0.61      0.78      0.69       569



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [2]:

X_columns = [
    'Bit_Temperature',  'Depth_Temperature',
    'Depth_of_Bit', 'MUD_Circulation', 'MUD_Density', 
    'Permeability',  'Porosity', 'Casing Integrity',
    'Pressure',  'Weight_on_Bit'
]

def update_class_weights_and_retrain(pipeline, X_train, y_train, iteration):
    # Increase weight for 'True' class every X iterations
    weight_increment = (iteration // 500) * 1.05
    class_weights = {False: 1, True: 1.7 + weight_increment}

    # Create a new pipeline with updated class weights
    updated_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        # ('classifier', SVC(class_weight=class_weights))
        ('classifier', LogisticRegression(class_weight=class_weights))
    ])

    # Retrain the model
    updated_pipeline.fit(X_train, y_train)
    return updated_pipeline


# Function to make decision based on new data
def make_decision(new_data, pipeline):

    # Convert new data to DataFrame and ensure the columns are in the correct order
    new_data_df = pd.DataFrame([new_data])[X_columns]
    
    # Predict using the model
    prediction = pipeline.predict(new_data_df)

    print(prediction[0])
    
    # Return decision based on prediction
    return "Repair" if prediction[0] else "Drill"  # Model predicts failure or success

# Session management and API interaction
session_id = None

try:
    # Start the well and get the session ID
    response = requests.get("http://10.68.195.110:8000/well/start/4")
    response.raise_for_status()
    data = response.json()
    session_id = data.get('sessionID')

    print("Sessionid ; ", session_id)

    if not session_id:
        raise ValueError("Session ID not found in the response")

    print(f"Started well with session ID: {session_id}")

    conditions = data['conditions']
    new_data = {
        'Bit_Temperature': conditions['Bit_Temperature'],
        'Casing Integrity': conditions['Casing Integrity'],
        # 'Cuttings': conditions['Cuttings'],
        'Depth_Temperature': conditions['Depth_Temperature'],
        'Depth_of_Bit': conditions['Depth_of_Bit'],
        'MUD_Circulation': conditions['MUD_Circulation'],
        'MUD_Density': conditions['MUD_Density'],
        # 'MUD_Density_Rating': conditions['MUD_Density_Rating'],
        'Permeability': conditions['Permeability'],
        # 'Permeability_Rating': conditions['Permeability_Rating'],
        'Porosity': conditions['Porosity'],
        # 'Precipitation': conditions['Precipitation'],
        'Pressure': conditions['Pressure'],
        # 'Rotary_Speed': conditions['Rotary_Speed'],
        # 'Rotary_Torque': conditions['Rotary_Torque'],
        'Weight_on_Bit': conditions['Weight_on_Bit'],
    }

    # Initialize the count
    count = 0

    while count < 950:
        try:
            
            if count % 500 == 0 and count != 0:
                print(f"Updating class weights and retraining model at iteration {count}")
                pipeline = update_class_weights_and_retrain(pipeline, X_train, y_train, count)


            # print(data['failure'])
            #Make a decision based on new data
            if not data or data['failure'] == True:
                decision = "Repair"
            else:
                decision = make_decision(new_data, pipeline)
            
            # Call the appropriate endpoint based on the decision
            if decision == "Drill":
                drill_url = f"http://10.68.195.110:8000/well/drill/{session_id}"
                print(f"Requesting URL: {drill_url}")
                try:
                    response = requests.get(drill_url, timeout=3)
                    response.raise_for_status()
                    data = response.json()
                except requests.Timeout as e:
                    print(f"Timeout occurred: {e}. Trying fallback URL.")
                    try:
                        fallback_url = f"http://10.68.195.110:8000/well/current/{session_id}"
                        response = requests.get(fallback_url, timeout=3)
                        response.raise_for_status()
                        data = response.json()
                    except requests.RequestException as fallback_e:
                        print(f"Fallback request failed: {fallback_e}.")
                        data = None
                except requests.RequestException as e:
                    print(f"Request failed: {e}.")
                    data = None

            elif decision == "Repair":
                repair_url = f"http://10.68.195.110:8000/well/repair/{session_id}"
                print(f"Requesting URL: {repair_url}")
                try:
                    response = requests.get(repair_url, timeout=3)
                    response.raise_for_status()
                    data = response.json()
                except requests.Timeout as e:
                    print(f"Timeout occurred: {e}. Trying fallback URL.")
                    try:
                        fallback_url = f"http://10.68.195.110:8000/well/current/{session_id}"
                        response = requests.get(fallback_url, timeout=3)
                        response.raise_for_status()
                        data = response.json()
                    except requests.RequestException as fallback_e:
                        print(f"Fallback request failed: {fallback_e}.")
                        data = None
                except requests.RequestException as e:
                    print(f"Request failed: {e}.")
                    data = None

            # Extract new conditions and update new_data
            if data:
                conditions = data['conditions']
                new_data = {
                    'Bit_Temperature': conditions['Bit_Temperature'],
                    'Casing Integrity': conditions['Casing Integrity'],
                    # 'Cuttings': conditions['Cuttings'],
                    'Depth_Temperature': conditions['Depth_Temperature'],
                    'Depth_of_Bit': conditions['Depth_of_Bit'],
                    'MUD_Circulation': conditions['MUD_Circulation'],
                    'MUD_Density': conditions['MUD_Density'],
                    # 'MUD_Density_Rating': conditions['MUD_Density_Rating'],
                    'Permeability': conditions['Permeability'],
                    # 'Permeability_Rating': conditions['Permeability_Rating'],
                    'Porosity': conditions['Porosity'],
                    # 'Precipitation': conditions['Precipitation'],
                    'Pressure': conditions['Pressure'],
                    # 'Rotary_Speed': conditions['Rotary_Speed'],
                    # 'Rotary_Torque': conditions['Rotary_Torque'],
                    'Weight_on_Bit': conditions['Weight_on_Bit'],
                }
                score = data.get('score', 'N/A')
                turn = data.get('turn', 'N/A')
                print(f"Decision based on new data: {decision}, Score is: {score} Turn is: {turn}")

            # Increment the count
            count += 1

        except requests.RequestException as e:
            print(f"Error during API call: {e}")

finally:
    if session_id:
        try:
            # End the well process using the session ID
            end_url = f"http://10.68.195.110:8000/well/end/{session_id}"
            print(f"Ending URL: {end_url}")
            end_response = requests.get(end_url)
            end_response.raise_for_status()
            print(f"Ended well with session ID: {session_id}")
        except requests.RequestException as e:
            print(f"Error during ending the well: {e}")


Sessionid ;  g0L8SWwgXSZKfRM_5Ldk_uzWoLE
Started well with session ID: g0L8SWwgXSZKfRM_5Ldk_uzWoLE
False
Requesting URL: http://10.68.195.110:8000/well/drill/g0L8SWwgXSZKfRM_5Ldk_uzWoLE
Decision based on new data: Drill, Score is: 1 Turn is: 1
False
Requesting URL: http://10.68.195.110:8000/well/drill/g0L8SWwgXSZKfRM_5Ldk_uzWoLE
Decision based on new data: Drill, Score is: 2 Turn is: 2
False
Requesting URL: http://10.68.195.110:8000/well/drill/g0L8SWwgXSZKfRM_5Ldk_uzWoLE
Decision based on new data: Drill, Score is: 3 Turn is: 3
False
Requesting URL: http://10.68.195.110:8000/well/drill/g0L8SWwgXSZKfRM_5Ldk_uzWoLE
Decision based on new data: Drill, Score is: 4 Turn is: 4
False
Requesting URL: http://10.68.195.110:8000/well/drill/g0L8SWwgXSZKfRM_5Ldk_uzWoLE
Decision based on new data: Drill, Score is: 5 Turn is: 5
False
Requesting URL: http://10.68.195.110:8000/well/drill/g0L8SWwgXSZKfRM_5Ldk_uzWoLE
Decision based on new data: Drill, Score is: -5 Turn is: 6
Requesting URL: http://10.68.1