# Case 2: User KGI 04

## Import Dependencies

In [1]:
import pandas as pd
import os

## Data Loading

In [2]:
DATA_PATH = "./Dataset/"
CSV_FILES = ["15_9-F-1 A.csv", "15_9-F-1 B.csv", "15_9-F-1 C.csv", "15_9-F-11 A.csv", "15_9-F-11 B.csv"]

def load_data(file_paths):
    all_data = []
    
    for file_path in file_paths:
        try:
            full_path = os.path.join(DATA_PATH, file_path)
            df = pd.read_csv(full_path)
            
            well_name = os.path.basename(file_path).split('.')[0]
            df['WELL'] = well_name
            
            all_data.append(df)
            print(f"Loaded {file_path} successfully")
        except FileNotFoundError:
            print(f"Warning: File {file_path} not found. Skipping.")
        except Exception as e:
            print(f"Error loading {file_path}: {str(e)}")
    
    combined_df = pd.concat(all_data, ignore_index=True)
    print(f"Combined dataset shape: {combined_df.shape}")
    return combined_df

file_paths = [os.path.join(DATA_PATH, file) for file in CSV_FILES]
df = load_data(CSV_FILES)

Loaded 15_9-F-1 A.csv successfully
Loaded 15_9-F-1 B.csv successfully
Loaded 15_9-F-1 C.csv successfully
Loaded 15_9-F-11 A.csv successfully
Loaded 15_9-F-11 B.csv successfully
Combined dataset shape: (33673, 9)


## Simple Data Exploration

In [4]:
print("\n--- Data Exploration ---")
print(f"Dataset Shape: {df.shape}")
print("First 5 rows:")
print(df.head())

print("\nBasic Statistics:")
print(df.describe())

print("\nMissing Values:")
missing_values = df.isnull().sum()
print(missing_values)
print(f"\nMissing Values Percentage:")
print((missing_values / len(df)) * 100)


--- Data Exploration ---
Dataset Shape: (33673, 9)
First 5 rows:
    DEPTH    NPHI    RHOB      GR       RT     PEF    CALI       DT  \
0  3100.0  0.0481  2.6067  8.3222  10.1810  7.5681  8.5781  60.6463   
1  3100.1  0.0481  2.6061  8.5168  10.0395  7.5860  8.5781  60.6713   
2  3100.2  0.0483  2.6063  8.3134   9.9846  7.6333  8.5781  60.7070   
3  3100.3  0.0486  2.6078  8.6389   9.9122  7.6751  8.5781  60.7455   
4  3100.4  0.0488  2.6091  8.2602   9.8524  7.6777  8.5781  60.7840   

         WELL  
0  15_9-F-1 A  
1  15_9-F-1 A  
2  15_9-F-1 A  
3  15_9-F-1 A  
4  15_9-F-1 A  

Basic Statistics:
              DEPTH          NPHI          RHOB            GR            RT  \
count  33673.000000  22814.000000  22870.000000  33489.000000  33209.000000   
mean    3225.772028      0.183379      2.461276     41.988215    429.736408   
std      287.753501      0.084425      0.148896     50.460093   5097.033177   
min     2600.000000      0.042000      2.033000      0.969100      0.065000 