In [84]:
import pandas as pd
import os
import plotly.graph_objects as go
import numpy as np

well_loc = pd.read_csv('well-loc.tsv', sep='\t')

# Path to the sensor data directory
sensor_data_path = 'sensor-data'

# List all TSV files in the directory
sensor_data_files = [f for f in os.listdir(sensor_data_path) if f.endswith('.tsv')]

# Sort the sensor_data_files list
sensor_data_files.sort(key= lambda x: int(x.split('.')[0]))

# Load and concatenate all sensor data files into one DataFrame
sensor_data_list = [pd.read_csv(os.path.join(sensor_data_path, file), sep='\t',
                                na_values="-9999") for file in sensor_data_files]

# Remove the data point with NaN value
well_loc = well_loc.dropna()
for sensor_data in sensor_data_list:
    sensor_data.dropna(inplace=True)

# Reset the index of the well_loc DataFrame to Well, X, Y
well_loc.rename(columns={'井': 'Well'}, inplace=True)

# Reset the index of the sensor data DataFrame to Depth, Porosity, Hydrate Saturation
for idx, _ in enumerate(sensor_data_list):
	sensor_data_list[idx].columns = ['Depth', 'Porosity', 'Hydrate Saturation']

print(well_loc.head())  # Display the first few rows to verify it's loaded correctly
print(sensor_data_list[0].head())  # Display the first few rows to verify it's loaded correctly

  Well      X      Y
0  w01  34500  45000
1  w02  36000  45050
2  w03  37050  45020
3  w04  37880  46000
4  w05  35000  46030
         Depth  Porosity  Hydrate Saturation
112  1832.0004    0.5236              0.0237
113  1832.1528    0.5208              0.0175
114  1832.3052    0.5196              0.0000
115  1832.4576    0.5156              0.0000
116  1832.6100    0.4971              0.0000


In [86]:
# Determine the global minimum and maximum depths across all wells for uniform y-axis
min_depth = min(df['Depth'].min() for df in sensor_data_list)
max_depth = max(df['Depth'].max() for df in sensor_data_list)

print(min_depth, max_depth)

print(sensor_data_list[0].tail())


1517.2 2077.0596
          Depth  Porosity  Hydrate Saturation
1716  2076.4500    0.4489                 0.0
1717  2076.6024    0.4756                 0.0
1718  2076.7548    0.4636                 0.0
1719  2076.9072    0.4680                 0.0
1720  2077.0596    0.4673                 0.0


In [87]:
# Calculate the percentage of the data points contains the negative value in sensor data
negative_data = []
for idx, sensor_data in enumerate(sensor_data_list):
    condition = (sensor_data['Porosity'] < 0) | (sensor_data['Hydrate Saturation'] < 0)
    negative_data.append(len(sensor_data[condition]) / len(sensor_data))

# Plot the number of negative data points
fig = go.Figure(data=[go.Bar(x=sensor_data_files, y=negative_data)])
fig.update_layout(title_text='Number of negative data points in each sensor data file')
fig.show()


In [88]:
def estimate_resource(sensor_data: pd.Series) -> float:
	"""Estimate the resource at a given location based on sensor data"""
	# Get the Porosity and the Hydrate saturation
	porosity = sensor_data['Porosity']
	hydrate_saturation = sensor_data['Hydrate Saturation']

	valid_volume = 1 # Assume the valid volume is 1 cubic meter
	factor = 155 # Assume the factor is 155

	# Calculate the resource estimate
	return valid_volume * porosity * hydrate_saturation * factor

In [89]:
esti_data_list: list[pd.DataFrame] = []

# Calculate the resource estimate for each sensor data in each depth
for sensor_data in sensor_data_list:
    estimation = pd.DataFrame()
    estimation['Estimated Resources'] = sensor_data.apply(estimate_resource, axis=1)
    estimation['Depth'] = sensor_data['Depth']
    esti_data_list.append(estimation)

print(esti_data_list[0])

      Estimated Resources      Depth
112              1.923445  1832.0004
113              1.412670  1832.1528
114              0.000000  1832.3052
115              0.000000  1832.4576
116              0.000000  1832.6100
...                   ...        ...
1716             0.000000  2076.4500
1717             0.000000  2076.6024
1718             0.000000  2076.7548
1719             0.000000  2076.9072
1720             0.000000  2077.0596

[1609 rows x 2 columns]


In [90]:
# Create a 3D scatter plot
fig = go.Figure()

for idx, df in enumerate(esti_data_list):
    well_name = well_loc.iloc[idx]['Well']
    x_coord = well_loc.iloc[idx]['X']
    y_coord = well_loc.iloc[idx]['Y']

    fig.add_trace(go.Scatter3d(
        x=[x_coord] * len(df),  # Repeat the X coordinate for each depth
        y=[y_coord] * len(df),  # Repeat the Y coordinate for each depth
        z=df['Depth'],
        mode='markers',
        marker=dict(
            size=3,
            color=df['Estimated Resources'],  # Color points by estimated resources
            colorscale='Viridis',
            opacity=0.8
        ),
        name=f'Well {well_name}'
    ))

# Update plot layout
fig.update_layout(
    title='3D Plot of Resource Distribution by Depth',
    height=600,
    width=1200,
    scene=dict(
        xaxis_title='X Coordinate',
        yaxis_title='Y Coordinate',
        zaxis_title='Depth'
    ),
)

fig.show()

In [None]:
from scipy.interpolate import interp1d


def align_index_step_size(df, new_depth_step=0.1):

    # Create the new depth grid starting from the base depth
    min_depth = np.around(min(df['Depth']), decimals=1)
    max_depth = np.around(max(df['Depth']), decimals=1)
    new_depths = np.around(np.arange(min_depth, max_depth, new_depth_step), decimals=1)

    # Interpolate other columns
    interpolated_data = {}
    for column in df.columns:
        if column != 'Depth':
            # Create interpolation function
            f = interp1d(df['Depth'], df[column], bounds_error=False, fill_value="extrapolate")
            # Interpolate data
            interpolated_data[column] = f(new_depths)

    # Create new DataFrame with interpolated data and new depth grid
    interpolated_df = pd.DataFrame(interpolated_data, index=new_depths)
    interpolated_df.reset_index(inplace=True)
    interpolated_df.rename(columns={'index': 'Depth'}, inplace=True)
    return interpolated_df

In [91]:
from plotly.subplots import make_subplots

# Determine the global minimum and maximum depths across all wells for uniform y-axis
min_depth = min(df['Depth'].min() for df in esti_data_list)
max_depth = max(df['Depth'].max() for df in esti_data_list)

print(min_depth, max_depth)

# Create a uniform depth grid
uniform_depths = np.around(np.arange(min_depth, max_depth + 0.1, 0.1), decimals=1)  # Adjust step size if necessary

print(uniform_depths)

# Update each dataset to include the uniform depth grid
uniform_datasets = []
for df in esti_data_list:
    df = align_index_step_size(df)
    # Set Depth as index and reindex with the uniform depths, filling missing values with 0
    uniform_df = df.set_index('Depth').reindex(uniform_depths, fill_value=0).reset_index()
    uniform_datasets.append(uniform_df)

# Create subplots: one for each well
num_wells = len(uniform_datasets)
fig = make_subplots(rows=1, cols=num_wells, subplot_titles=[f"Well {well}" for well in well_loc['Well']])

# Add a line plot for each well
for idx, df in enumerate(uniform_datasets):
    well_name = well_loc.iloc[idx]['Well']
    fig.add_trace(
        go.Scatter(
            x=df['Estimated Resources'],
            y=df['Depth'],
            mode='lines+markers',
            name=f'Well {well_name}',
            marker=dict(size=2),
            line=dict(width=1)
        ),
        row=1, col=idx+1
    )

# Update y-axis to be reversed and uniform
# Only show y-axis on the first and last subplot
for i in range(1, num_wells + 1):
    if i == 1:  # First subplot
        fig.update_yaxes(title_text="Depth (m)", autorange="reversed", range=[min_depth, max_depth],
                         row=1, col=i)
    elif i == num_wells:  # Last subplot
        fig.update_yaxes(autorange="reversed", range=[min_depth, max_depth],
                         row=1, col=i, showticklabels=True,  side='right')
    else:
        fig.update_yaxes(autorange="reversed", range=[min_depth, max_depth],
                         row=1, col=i, showticklabels=False)

# Update y-axis to be reversed and uniform
fig.update_yaxes(autorange="reversed", range=[min_depth, max_depth])

# Update layout to better fit the subplots
fig.update_layout(
    height=600,
    width=1550,
    title_text="Resource Distribution in Depth for Each Sensor Tower",
    showlegend=False
)

fig.show()

1517.2 2077.0596
[1517.2 1517.3 1517.4 ... 2076.9 2077.  2077.1]


In [92]:

# Create the plot
fig = go.Figure()

# Add traces for each tower
for well_data, (idx, well) in zip(esti_data_list, well_loc.iterrows()):
    fig.add_trace(go.Scatter(
        x=well_data['Depth'],
        y=well_data['Estimated Resources'],
        mode='lines+markers',
        name=f"Well {well['Well']} at ({well['X']}, {well['Y']})"
    ))

# Update layout
fig.update_layout(
    title="Resource Distribution by Depth across Wells",
    xaxis_title="Depth",
    yaxis_title="Estimated Resources",
    legend_title="Wells"
)

# Show the plot
fig.show()