# Contents and geometry of tunnels and boreholes
This notebook concatenates tunnel data to existing borehole data to
have a unified dataframe to locate global coordinates for shearzones.

In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.extend([
    'C:/Users/Haakon/OneDrive/Dokumenter/FORSKNING/mastersproject/src/mastersproject',
    'C:/Users/Haakon/OneDrive/Dokumenter/FORSKNING/mastersproject/src/mastersproject/GTS'
])

# Notebook images
from IPython.display import Image

In [2]:
import GTS as gts
# from GTS.ISC_data.isc import ISCData
import numpy as np
import pandas as pd

In [3]:
cls = gts.ISCData()

# Extract shear-zone -- tunnel intersections --> global coordinates
The data on shear-zone -- tunnel intersections should be appended to the existing
borehole -- shear-zone data to compute global coordinates.

In [4]:
tn = cls.tunnel_shearzone_data()
tn.head()

Unnamed: 0,x,y,z,true_dip_direction,dip,tunnel,shearzone
0,667472.625,158925.321,1733.436,330.0,83.25,AU,11
1,667409.735,158888.36,1735.419,140.9,51.39,VE,11
2,667474.565,158935.311,1733.858,142.25,78.88,AU,12
3,667410.917,158895.617,1734.431,146.44,80.64,VE,12
4,667474.839,158943.317,1733.611,135.33,72.02,AU,13


#### Rename shear-zone for clarity 

In [5]:
def rename_sz(sz):
    """ Rename shearzone on form '12' to 'S1_2'. """
    sz = str(sz)
    sz_set = sz[0]
    num = sz[1]
    return f'S{sz_set}_{num}'

In [6]:
tn['shearzone'] = tn['shearzone'].apply(rename_sz)

In [7]:
strc = cls.borehole_structure_data().merge(cls.borehole_data(), 
                                                 how='outer', 
                                                 on='borehole',
                                                 suffixes=('_struc', '_bh'), 
                                                 validate='m:1')
strc.head()

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient
0,2.35,169.3,57.47,334.61,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92
1,2.99,162.65,58.83,35.7,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92
2,3.27,172.09,58.07,0.0,Foliation,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92
3,5.02,163.86,61.26,578.25,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92
4,6.76,169.9,66.49,0.0,Foliation,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92


### Concatenate shear-zone -- tunnel & shear-zone -- borehole data to one another
The missing columns for the sz-tunnel data should be filled with 0's. \
Then, apply the `bh_struc_to_global_coords` method. \

Note: This method will actually do nothing to the sz-tunnel data, because of all 0's.
Essentially, only GTS-localized coordinates are computed. Other values remain the same / are set to 0.

In [8]:
strc.columns

Index(['depth', 'azimuth_struc', 'dip', 'aperture', 'type', 'borehole', 'x',
       'y', 'z', 'length', 'diameter', 'azimuth_bh', 'upward_gradient'],
      dtype='object')

In [9]:
{s: 0 for s in strc}

{'depth': 0,
 'azimuth_struc': 0,
 'dip': 0,
 'aperture': 0,
 'type': 0,
 'borehole': 0,
 'x': 0,
 'y': 0,
 'z': 0,
 'length': 0,
 'diameter': 0,
 'azimuth_bh': 0,
 'upward_gradient': 0}

In [10]:
df = pd.concat([
    strc,
    tn.rename(columns={'true_dip_direction': 'azimuth_struc', 
                       'tunnel': 'borehole', 
#                        'shearzone': 'type',
                      })
],
ignore_index=True,
sort=False).fillna(value={
    **{s: 0 for s in strc},  # All columns in strc: np.NaN --> 0    
})
df.tail(13).head(6)

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient,shearzone
588,36.31,168.6,64.36,0.0,Fracture,GEO4,667470.776,158912.0,1732.618,40.05,86.0,270.03,-34.22,
589,38.32,160.06,74.77,0.0,Fracture,GEO4,667470.776,158912.0,1732.618,40.05,86.0,270.03,-34.22,
590,39.05,180.71,64.95,0.0,Fracture,GEO4,667470.776,158912.0,1732.618,40.05,86.0,270.03,-34.22,
591,0.0,330.0,83.25,0.0,0,AU,667472.625,158925.321,1733.436,0.0,0.0,0.0,0.0,S1_1
592,0.0,140.9,51.39,0.0,0,VE,667409.735,158888.36,1735.419,0.0,0.0,0.0,0.0,S1_1
593,0.0,142.25,78.88,0.0,0,AU,667474.565,158935.311,1733.858,0.0,0.0,0.0,0.0,S1_2


In [11]:
mapping = {'x': 'x', 'y': 'y', 'z': 'z', 'depth': 'depth', 
           'upward_gradient': 'upward_gradient', 'azimuth': 'azimuth_bh'}
cls.bh_struc_to_global_coords(df, **mapping)
df.tail(13).head(6)

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,...,shearzone,_trig_x,_trig_y,_trig_z,x_swiss,y_swiss,z_swiss,x_gts,y_gts,z_gts
588,36.31,168.6,64.36,0.0,Fracture,GEO4,667470.776,158912.0,1732.618,40.05,...,,-0.826884,0.000433,-0.562372,667440.751834,158912.015721,1712.198271,40.751834,112.015721,12.198271
589,38.32,160.06,74.77,0.0,Fracture,GEO4,667470.776,158912.0,1732.618,40.05,...,,-0.826884,0.000433,-0.562372,667439.089797,158912.016591,1711.067903,39.089797,112.016591,11.067903
590,39.05,180.71,64.95,0.0,Fracture,GEO4,667470.776,158912.0,1732.618,40.05,...,,-0.826884,0.000433,-0.562372,667438.486172,158912.016907,1710.657371,38.486172,112.016907,10.657371
591,0.0,330.0,83.25,0.0,0,AU,667472.625,158925.321,1733.436,0.0,...,S1_1,0.0,1.0,0.0,667472.625,158925.321,1733.436,72.625,125.321,33.436
592,0.0,140.9,51.39,0.0,0,VE,667409.735,158888.36,1735.419,0.0,...,S1_1,0.0,1.0,0.0,667409.735,158888.36,1735.419,9.735,88.36,35.419
593,0.0,142.25,78.88,0.0,0,AU,667474.565,158935.311,1733.858,0.0,...,S1_2,0.0,1.0,0.0,667474.565,158935.311,1733.858,74.565,135.311,33.858


## Generate DataFrame for all shear-zone usable shearzone measurement points
The dataset on borehole -- shear-zone intersections only contain information on shear-zone sets, not individual shear-zones. However, there exists more shear-zone measurements than interpolated shear-zones. \
In other words, some shear-zones are located multiple times along the same borehole.

The ISC visualization tool only uses certain intersections for their interpolation. \
The datasets in `06_ShearzoneInterpolations > Sx_y.txt` maps 1 shearzone to at most 1 borehole (sometimes no intersection). \
We will use the same intersections in our analysis - thus disregarding the rest.

### Flag rows of master df that match in `borehole` and `depth` with `shearzone_borehole_data`

In [130]:
sz_bh = cls.shearzone_borehole_data()
sz_bh.head()

Unnamed: 0,borehole,depth,shearzone
0,INJ1,34.92,S1_1
1,INJ2,30.99,S1_1
2,FBS1,31.09,S1_1
3,FBS2,,S1_1
4,FBS3,19.42,S1_1


In [131]:
sz_bh = sz_bh[sz_bh['depth'].notna()]
sz_bh.head()

Unnamed: 0,borehole,depth,shearzone
0,INJ1,34.92,S1_1
1,INJ2,30.99,S1_1
2,FBS1,31.09,S1_1
4,FBS3,19.42,S1_1
5,PRP1,33.04,S1_1


In [132]:
strc.head(2)

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient
0,2.35,169.3,57.47,334.61,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92
1,2.99,162.65,58.83,35.7,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92


In [140]:
strc_mrg = strc.merge(sz_bh, how='outer', on=['borehole', 'depth'], validate='m:1')
sz = strc_mrg[strc_mrg.shearzone.notna()]
sz

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient,shearzone
18,23.34,140.51,26.14,235.14,S3 Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101.0,317.05,-31.92,S3_1
29,27.83,175.64,38.55,298.08,S3 Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101.0,317.05,-31.92,S3_2
42,31.09,168.02,75.22,847.39,S1 Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101.0,317.05,-31.92,S1_1
50,36.05,170.44,53.09,1228.13,S1 Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101.0,317.05,-31.92,S1_2
62,42.44,164.13,74.91,1513.55,S1 Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101.0,317.05,-31.92,S1_3
105,19.42,157.49,74.12,40.9,S1 Shear-zone,FBS3,667471.317,158926.213,1732.263,44.0,101.0,219.92,-37.33,S1_1
114,33.39,185.09,70.3,92.0,S3 Shear-zone,FBS3,667471.317,158926.213,1732.263,44.0,101.0,219.92,-37.33,S3_2
137,42.05,176.65,86.84,179.31,S3 Shear-zone,FBS3,667471.317,158926.213,1732.263,44.0,101.0,219.92,-37.33,S3_1
173,20.37,183.35,70.51,157.31,S3 Shear-zone,SBH4,667468.729,158892.774,1733.91,23.9,101.0,320.0,5.0,S3_1
199,23.79,172.63,74.4,112.98,S3 Shear-zone,SBH4,667468.729,158892.774,1733.91,23.9,101.0,320.0,5.0,S3_2


## Issue: One value is not matched up... 

In [134]:
sz[sz.type.isna()]

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient,shearzone
591,25.58,,,,,PRP3,,,,,,,,S1_1


In [135]:
sz_bh[sz_bh.borehole == 'PRP3'] 

Unnamed: 0,borehole,depth,shearzone
7,PRP3,25.58,S1_1
22,PRP3,31.14,S1_2
52,PRP3,15.57,S3_1
67,PRP3,19.34,S3_2


In [136]:
t0 = cls.borehole_shearzone_data()
t0[(t0.borehole == 'PRP3') & (t0.type.isin(['S1 Shear-zone']))]

Unnamed: 0,depth,azimuth,dip,aperture,type,borehole
478,21.18,178.1,72.9,1612.92,S1 Shear-zone,PRP3
483,25.57,176.5,55.72,1539.04,S1 Shear-zone,PRP3
486,31.14,166.4,54.9,1201.46,S1 Shear-zone,PRP3


### Explanation for why it isn't matched
As we can see, $25.58 \neq 25.57$, so the match is not found. \
We solve this by merging with `pd.merge_asof`: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.merge_asof.html.

In [137]:
t = pd.merge_asof(strc.sort_values('depth'),
                  sz_bh.sort_values('depth'),
                  by='borehole', 
                  on='depth',
                  tolerance=0.01, 
                  direction='nearest')
tt = t[t.shearzone.notna()]
t3 = tt[tt['borehole'] == 'PRP3']

In [138]:
tt = tt[tt.type.isin(['S1 Shear-zone', 'S3 Shear-zone', 'Minor ductile Shear-zone'])]
tt[tt['borehole'] == 'PRP3']

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient,shearzone
151,15.57,185.46,60.58,308.85,S3 Shear-zone,PRP3,667468.39,158892.66,1733.1,32.33,131,335.01,-25.0,S3_1
216,19.34,166.63,56.55,315.75,S3 Shear-zone,PRP3,667468.39,158892.66,1733.1,32.33,131,335.01,-25.0,S3_2
336,25.57,176.5,55.72,1539.04,S1 Shear-zone,PRP3,667468.39,158892.66,1733.1,32.33,131,335.01,-25.0,S1_1
415,31.14,166.4,54.9,1201.46,S1 Shear-zone,PRP3,667468.39,158892.66,1733.1,32.33,131,335.01,-25.0,S1_2


In [139]:
tt.shape

(39, 14)

In [22]:
sz[sz['borehole']=='PRP3']

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient,shearzone
465,15.57,185.46,60.58,308.85,S3 Shear-zone,PRP3,667468.39,158892.66,1733.1,32.33,131.0,335.01,-25.0,S3_1
473,19.34,166.63,56.55,315.75,S3 Shear-zone,PRP3,667468.39,158892.66,1733.1,32.33,131.0,335.01,-25.0,S3_2
486,31.14,166.4,54.9,1201.46,S1 Shear-zone,PRP3,667468.39,158892.66,1733.1,32.33,131.0,335.01,-25.0,S1_2
591,25.58,,,,,PRP3,,,,,,,,S1_1


In [23]:
sz.shape

(41, 14)

### _Issue:_ `merge_asof` brings along something of `type = fracture` 
See index 152 of `tt[tt['borehole'] == 'PRP3']`.

## Add type to shear-zone -- borehole data
Then, merge on both `borehole` and `type` to ensure that no fractures are counted.


In [24]:
# Import shear-zone -- borehole data.
sz_bh = cls.shearzone_borehole_data()
sz_bh = sz_bh[sz_bh['depth'].notna()]  # Remove NaN's.

# Add column representing shear-zone set.
sz_bh['type'] = sz_bh['shearzone'].str[:2] + ' Shear-zone'
sz_bh.head(3)

Unnamed: 0,borehole,depth,shearzone,type
0,INJ1,34.92,S1_1,S1 Shear-zone
1,INJ2,30.99,S1_1,S1 Shear-zone
2,FBS1,31.09,S1_1,S1 Shear-zone


In [25]:
strc = cls.borehole_structure_data().merge(cls.borehole_data(), 
                                                 how='outer', 
                                                 on='borehole',
                                                 suffixes=('_struc', '_bh'), 
                                                 validate='m:1')
strc.head(3)

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient
0,2.35,169.3,57.47,334.61,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92
1,2.99,162.65,58.83,35.7,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92
2,3.27,172.09,58.07,0.0,Foliation,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92


In [26]:
merged = pd.merge_asof(strc.sort_values('depth'),
                  sz_bh.sort_values('depth'),
                  by=['borehole', 'type'], 
                  on='depth',
                  tolerance=0.01, 
                  direction='nearest')
merged.head(2)

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient,shearzone
0,1.76,239.29,86.44,1.89,Fracture,SBH3,667468.567,158885.383,1733.96,20.55,101,190.07,4.91,
1,2.11,78.11,21.13,0.0,Fracture,PRP3,667468.39,158892.66,1733.1,32.33,131,335.01,-25.0,


In [27]:
test_type='INJ1'

In [28]:
merged[merged.shearzone.notna()]['borehole'].value_counts().sort_index()

FBS1    5
FBS3    3
GEO3    1
GEO4    3
INJ1    5
INJ2    5
PRP1    5
PRP2    5
PRP3    4
SBH4    2
Name: borehole, dtype: int64

In [29]:
sz_bh['borehole'].value_counts().sort_index()

FBS1    5
FBS3    3
GEO3    2
GEO4    3
INJ1    5
INJ2    5
PRP1    5
PRP2    5
PRP3    4
SBH4    2
Name: borehole, dtype: int64

Notice the discreprancy in `GEO3`, above.

In [30]:
merged[(merged.shearzone.notna()) & (merged.borehole=='GEO3')]

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient,shearzone
199,18.63,165.74,74.19,1244.19,S1 Shear-zone,GEO3,667470.923,158912.008,1732.416,30.1,86,269.96,-47.8,S1_1


In [31]:
sz_bh[sz_bh.borehole=='GEO3']

Unnamed: 0,borehole,depth,shearzone,type
10,GEO3,18.63,S1_1,S1 Shear-zone
25,GEO3,26.49,S1_2,S1 Shear-zone


In [32]:
strc[(strc.borehole=='GEO3')].tail()

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient
542,25.32,150.68,85.56,0.0,Fracture,GEO3,667470.923,158912.008,1732.416,30.1,86,269.96,-47.8
543,25.37,159.5,78.99,0.0,Fracture,GEO3,667470.923,158912.008,1732.416,30.1,86,269.96,-47.8
544,26.49,165.79,73.15,380.94,Minor ductile Shear-zone,GEO3,667470.923,158912.008,1732.416,30.1,86,269.96,-47.8
545,26.73,160.03,82.12,0.0,Fracture,GEO3,667470.923,158912.008,1732.416,30.1,86,269.96,-47.8
546,27.66,161.16,66.69,0.0,Fracture,GEO3,667470.923,158912.008,1732.416,30.1,86,269.96,-47.8


Notice, above, we were missing something on depth $26.49$. This is exactly the structure classified as a `Minor ductile Shear-zone`

## This method, above, fails under the assumption that all shear zones are either `S1 Shear-zone` or `S3 Shear-zone`

--------------------
# Next attempt

In [96]:
# Import shear-zone -- borehole data.
sz_bh = cls.shearzone_borehole_data()
sz_bh = sz_bh[sz_bh['depth'].notna()]  # Remove rows with no intersection.
sz_bh

Unnamed: 0,borehole,depth,shearzone
0,INJ1,34.92,S1_1
1,INJ2,30.99,S1_1
2,FBS1,31.09,S1_1
4,FBS3,19.42,S1_1
5,PRP1,33.04,S1_1
6,PRP2,30.1,S1_1
7,PRP3,25.58,S1_1
10,GEO3,18.63,S1_1
11,GEO4,19.52,S1_1
15,INJ1,39.11,S1_2


In [117]:
strc = cls.borehole_structure_data().merge(cls.borehole_data(), how='outer', on='borehole', 
                                           suffixes=('_struc', '_bh'), validate='m:1')
strc['shearzone'] = np.nan
strc1 = strc.loc[:,['depth', 'type', 'borehole', 'shearzone']]
strc1.head()

Unnamed: 0,depth,type,borehole,shearzone
0,2.35,Minor ductile Shear-zone,FBS1,
1,2.99,Minor ductile Shear-zone,FBS1,
2,3.27,Foliation,FBS1,
3,5.02,Minor ductile Shear-zone,FBS1,
4,6.76,Foliation,FBS1,


In [115]:
# This mask filters potential shear-zones with other structures.
_mask_sz = strc.loc[:,'type'].isin(['Minor ductile Shear-zone', 'S1 Shear-zone', 'S3 Shear-zone']).values



In [116]:
strc.loc[_mask_sz, '']

Index(['depth', 'azimuth_struc', 'dip', 'aperture', 'type', 'borehole', 'x',
       'y', 'z', 'length', 'diameter', 'azimuth_bh', 'upward_gradient',
       'shearzone'],
      dtype='object')

#### Next:
Merge the `structure` data with the `shearzone-borehole` data. \
Note that we have drastically increased the number of rows. This is because merge solely based on matching borehole. \
Therefore, we need to remove to superfluous data before proceeding.

In [106]:
# Merge borehole-structures with shearzone-boreholes.
strc_mrg = strc.merge(sz_bh, how='outer', on=['borehole', 'depth'])

sz = strc_mrg[strc_mrg.shearzone.notna()]  # I don't quite understand/remember why this filter is here.
sz.type.value_counts(dropna=False)

S1 Shear-zone               21
S3 Shear-zone               16
Fracture                     2
Minor ductile Shear-zone     1
NaN                          1
Name: type, dtype: int64

In [107]:
strc_mrg.type.value_counts(dropna=False)

Fracture                    412
Quartz                       67
S1 Shear-zone                34
Foliation                    29
S3 Shear-zone                28
Minor ductile Shear-zone     15
Biotite Band                  6
NaN                           1
Name: type, dtype: int64

In [100]:
sz.head(3)

Unnamed: 0,depth_x,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient,depth_y,shearzone
0,2.35,169.3,57.47,334.61,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92,31.09,S1_1
1,2.35,169.3,57.47,334.61,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92,36.05,S1_2
2,2.35,169.3,57.47,334.61,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.8,101,317.05,-31.92,42.44,S1_3


In [101]:
_mask = np.isclose(sz.depth_x.values, sz.depth_y.values, atol=0.01)
_mask2 = sz.type.isin(['Minor ductile Shear-zone', 'S1 Shear-zone', 'S3 Shear-zone'])
sz2 = sz[_mask]

In [102]:
sz2.shape

(42, 15)

In [105]:
sz[_mask & _mask2].shape

(39, 15)

#### This `_mask` finds the shear zones used for computer simulations 
We expect 39 locations for "real" shear-zones.

In [89]:
_mask = (np.isclose(sz.depth_x.values, sz.depth_y.values, atol=0.01) & 
        (sz.type.isin(['Minor ductile Shear-zone', 'S1 Shear-zone', 'S3 Shear-zone'])))

_mask.value_counts()

False    2097
True       39
Name: type, dtype: int64

In [53]:
sz.loc[~_mask.values, 'shearzone'] = np.nan

sz.shearzone.value_counts(dropna=False)

NaN     2097
S1_1       9
S3_2       8
S1_2       8
S3_1       8
S1_3       6
Name: shearzone, dtype: int64

In [54]:
sz

Unnamed: 0,depth_x,azimuth_struc,dip,aperture,type,borehole,x,y,z,length,diameter,azimuth_bh,upward_gradient,depth_y,shearzone
0,2.35,169.30,57.47,334.61,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.80,101,317.05,-31.92,31.09,
1,2.35,169.30,57.47,334.61,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.80,101,317.05,-31.92,36.05,
2,2.35,169.30,57.47,334.61,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.80,101,317.05,-31.92,42.44,
3,2.35,169.30,57.47,334.61,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.80,101,317.05,-31.92,23.34,
4,2.35,169.30,57.47,334.61,Minor ductile Shear-zone,FBS1,667466.424,158888.882,1732.782,44.80,101,317.05,-31.92,27.83,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2210,38.32,160.06,74.77,0.00,Fracture,GEO4,667470.776,158912.000,1732.618,40.05,86,270.03,-34.22,28.05,
2211,38.32,160.06,74.77,0.00,Fracture,GEO4,667470.776,158912.000,1732.618,40.05,86,270.03,-34.22,35.62,
2212,39.05,180.71,64.95,0.00,Fracture,GEO4,667470.776,158912.000,1732.618,40.05,86,270.03,-34.22,19.52,
2213,39.05,180.71,64.95,0.00,Fracture,GEO4,667470.776,158912.000,1732.618,40.05,86,270.03,-34.22,28.05,


# IntervalIndex 
See: https://stackoverflow.com/a/46772977

In [124]:
# Import shear-zone -- borehole data.
sz_bh = cls.shearzone_borehole_data()
df1 = sz_bh[sz_bh['depth'].notna()]  # Remove rows with no intersection.
df1.head()

Unnamed: 0,borehole,depth,shearzone
0,INJ1,34.92,S1_1
1,INJ2,30.99,S1_1
2,FBS1,31.09,S1_1
4,FBS3,19.42,S1_1
5,PRP1,33.04,S1_1


In [125]:
strc = cls.borehole_structure_data().merge(cls.borehole_data(), how='outer', on='borehole', 
                                           suffixes=('_struc', '_bh'), validate='m:1')
# strc['shearzone'] = np.nan
df2 = strc.loc[:,('depth', 'type', 'borehole')]
df2.head()

Unnamed: 0,depth,type,borehole
0,2.35,Minor ductile Shear-zone,FBS1
1,2.99,Minor ductile Shear-zone,FBS1
2,3.27,Foliation,FBS1
3,5.02,Minor ductile Shear-zone,FBS1
4,6.76,Foliation,FBS1


In [129]:
pd.merge_asof(df2.sort_values('depth'), df1.sort_values('depth'), 
              direction='nearest', 
              on='depth', 
              by='borehole').sort_values('shearzone').drop_duplicates('shearzone')

Unnamed: 0,depth,type,borehole,shearzone
243,20.65,Fracture,GEO4,S1_1
451,34.35,S1 Shear-zone,INJ2,S1_2
588,47.36,Fracture,PRP1,S1_3
333,25.51,Fracture,PRP1,S3_1
290,22.99,Foliation,SBH4,S3_2
0,1.76,Fracture,SBH3,


In [128]:
tol=0.011
df2.index = pd.IntervalIndex.from_arrays(df2['depth']-tol, df2['depth']+tol, closed='both')
df1['shearzone']

Unnamed: 0,depth,type,borehole
"[2.339, 2.361]",2.35,Minor ductile Shear-zone,FBS1
"[2.979, 3.0010000000000003]",2.99,Minor ductile Shear-zone,FBS1
"[3.259, 3.281]",3.27,Foliation,FBS1
"[5.0089999999999995, 5.031]",5.02,Minor ductile Shear-zone,FBS1
"[6.749, 6.771]",6.76,Foliation,FBS1


df1 has [A,C] df2 has [A,B]

do tolerance on df2.A to include values from df1.