# Filter on full dataset
This notebook experiments with methods to filter specific (useful) information from the full dataset.

In [8]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.extend([
    'C:/Users/Haakon/OneDrive/Dokumenter/FORSKNING/mastersproject/src/mastersproject',
    'C:/Users/Haakon/OneDrive/Dokumenter/FORSKNING/mastersproject/src/mastersproject/GTS'
])

In [27]:
import numpy as np
import pandas as pd

import GTS as gts

In [10]:
cls = gts.ISCData()

In [11]:
df = cls.full_structure_geometry()

In [155]:
df[df.shearzone.type.isin(['S3 Shear-zone', 'S1 Shear-zone'])]

AttributeError: 'Series' object has no attribute 'type'

In [None]:
df[df.shearzone.type.isin(['S3 Shear-zone', 'S1 Shear-zone'])]

## Some shear-zone -- borehole intersections are not used for calculating planes
Consider borehole FBS2. Here we see that none of the `S1 shear-zone` have been classified as a shearzone. \
This means that the files in `06ShearzoneInterpolation > Sx_y.txt` does not use these intersections in shear-zone calculations.

In [69]:
_mask_type = df.type.isin(['S1 Shear-zone', 'S3 Shear-zone', 'Minor ductile Shear-zone'])
_mask_sz_nan = df.shearzone.isna()
_mask_bh = df.borehole == 'FBS2'
df.loc[_mask_type & _mask_sz_nan & _mask_bh, ('depth', 'type', 'borehole', 'shearzone')].sort_values('borehole')

Unnamed: 0,depth,type,borehole,shearzone
94,11.15,S1 Shear-zone,FBS2,
461,35.09,S1 Shear-zone,FBS2,
576,44.08,S1 Shear-zone,FBS2,


___________

# TODO:
1) Get gts coordinates for each shear zone as numpy array (3, n)

2) for a given borehole and a depth, or depth interval. Locate all structures in that region - or only shear-zones. 

3) Do regression over the shearzone points to get planes (normals and vertices)
    - Construct fractures
    
4) Do regression over shearzone points with one (or two) fixed points.
    - E.g. you'd want to fix the injection point for a shearzone-borehole intersection.
    
5) Mesh the fractured domain

6) Find nearest cell to a given coordinate on the map
    - Nearest point to a given borehole depth (depth-interval)
    - Nearest point to a fracture-shearzone intersection

# 1): Get gts coordinates for each shear zone as numpy array (3, n)

* Filter df by shear-zone
* df.to_numpy()
* np.T

In [23]:
def get_shearzone(sz: str, coords: str = 'gts'):
    """ Extract shear-zone coordinates for a given shear-zone
    
    Coordinates extracted will either be 'swiss' or 'gts'.
    
    Parameters:
    sz (str): Name of shear-zone (S1_1, S1_2, S1_3, S3_1, S3_2)
    coords (str, Default: 'gts'): 
        Get coordinates in 'gts' or 'swiss'.
    
    Returns
    np.ndarray (3, n): Coordinates of shearzone intersections.
    """

    assert sz in ['S1_1', 'S1_2', 'S1_3', 'S3_1', 'S3_2'], f"unknown shear-zone {sz}."
    assert coords in ['swiss', 'gts'], f"unknown coordinate system {coords}."
    sz = df.loc[df.shearzone == sz, (f'x_{coords}', f'y_{coords}', f'z_{coords}')]
    return sz.to_numpy().T

In [24]:
arr = get_shearzone('S1_1')

In [25]:
arr

array([[ 58.40884847,  61.40764443,  54.63522029,  58.59978767,
         50.78924812,  56.35671854,  48.44373512,  51.23287523,
         43.78967103,  72.625     ,   9.735     ],
       [111.99926347, 114.36994694, 112.00845129, 113.66474862,
        109.42440764, 110.06692009, 108.19723976, 107.51656832,
        106.94429858, 125.321     ,  88.36      ],
       [ 18.61481037,  20.48661828,  21.6404976 ,  22.29365105,
         17.17388556,  11.49011407,  16.34363975,   9.68848744,
         13.5331971 ,  33.436     ,  35.419     ]])

# 2): for a given borehole and a depth, or depth interval. Locate all structures in that region - or only shear-zones.
* Filter by borehole
* filter by absolute depth or depth-interval
* optionally filter by structure-type

In [28]:
df.columns

Index(['depth', 'azimuth_struc', 'dip', 'aperture', 'type', 'borehole', 'x',
       'y', 'z', 'length', 'diameter', 'azimuth_bh', 'upward_gradient',
       'shearzone', '_trig_x', '_trig_y', '_trig_z', 'x_swiss', 'y_swiss',
       'z_swiss', 'x_gts', 'y_gts', 'z_gts'],
      dtype='object')

In [47]:
def structures_depth(borehole: str, depth: np.ndarray, structure=None, shearzone=None, coords='gts'):
    """ Get structures in a borehole at depth
    
    For a given borehole, and a given depth (or depth interval),
    get all structures - or a subset of structures, or specific shearzones.
    
    Parameters:
    borehole (str): name of borehole (INJ1, INJ2, ...)
    depth (np.array): Depth interval in borehole.
    structure (str or list, Optiona): Filter by certain structures 
        (Fracture, Minor ductile Shear-zone, S1 Shear-zone, Quartz, ...)
    shearzone (str or list, Optional): Filter by certain shear-zones
        (S1_1, S1_2, ...)
    coords (str, optional): which coordinate system to return
    
    Returns:
    pd.DataFrame: Filtered dataframe
    
    """
    assert (depth.shape[0] == 2) and (depth[0] <= depth[1]), "Depth must be given as an interval."
    assert coords in ['swiss', 'gts'], f"unknown coordinate system {coords}."
    
    # Structure mask
    if isinstance(structure, str):
        structure = [structure]
    if structure is not None:
        _mask_struc = df.type.isin(structure)
    else:
        _mask_struc = np.ones(df.shape[0], dtype=bool)
    
    # Shear-zone mask
    if isinstance(shearzone, str):
        shearzone = [shearzone]
    if shearzone is not None:
        _mask_sz = df.shearzone.isin(shearzone)
    else:
        _mask_sz = np.ones(df.shape[0], dtype=bool)
    
    # Borehole mask
    _mask_bh = df.borehole == borehole
    
    # Depth mask
    _mask_depth = (depth[0] <= df.depth) & (df.depth <= depth[1])
    
    # Full mask #
    _mask = _mask_bh & _mask_depth & _mask_struc & _mask_sz
    
    # Filter DataFrame #
    _bh = df.loc[_mask, ('depth', 'azimuth_struc', 'dip', 'aperture', 'type', 
                         'borehole', 'shearzone', 
                         f'x_{coords}', f'y_{coords}', f'z_{coords}')]
    return _bh

In [62]:
arr1 = structures_depth(borehole='GEO3', depth=np.array([25,28]))
arr1

Unnamed: 0,depth,azimuth_struc,dip,aperture,type,borehole,shearzone,x_gts,y_gts,z_gts
327,25.32,150.68,85.56,0.0,Fracture,GEO3,,53.915039,111.996126,13.658828
330,25.37,159.5,78.99,0.0,Fracture,GEO3,,53.881453,111.996103,13.621787
345,26.49,165.79,73.15,380.94,Minor ductile Shear-zone,GEO3,S1_2,53.129126,111.995578,12.792086
352,26.73,160.03,82.12,0.0,Fracture,GEO3,,52.967913,111.995465,12.614293
365,27.66,161.16,66.69,0.0,Fracture,GEO3,,52.343213,111.995029,11.925345


# 3): Do regression over the shearzone points to get planes (normals and vertices) 
* Get numpy array of points for each shear-zone (use `get_shearzone()` method).
* Calculate shear-zone planes with linear regression.

_This method returns a dataframe_. Let's put that tactic on pause for a moment

In [123]:
# def shearzone_plane(shearzone, coords='gts'):
#     """ Calculate shear-zone planes from shear-zones.
    
#     Given a reference to a shear-zone, calculate the best-fit plane.
    
#     Parameters:
#     shearzone (str or list): Names of shearzones to construct planes of (S1_1, S1_2, ...)
#     coords (str, optional): Coordinate system to use. Default: 'gts'.
    
#     Returns:
#     pd.DataFrame: Dataframe with columns x,y,z coordinates, type ('proj' or 'n') and shearzone ('S1_1', etc.)
#     e.g.:
#        x_gts       y_gts       z_gts       type    shearzone
#     0  87.806066   61.332718   34.064036   proj    S1_1
#     1  90.829214   63.661430   35.948641   proj    S1_1
#     2  -0.485260   0.836353    -0.255021   n       S1_1
    
#     """
#     if isinstance(shearzone, str):
#         shearzone = [shearzone]
#     elif shearzone is None:
#         shearzone = ['S1_1', 'S1_2', 'S1_3', 'S3_1', 'S3_2']
    
# #     planes = {sz:{} for sz in shearzone}
#     planes = []
    
#     for sz in shearzone:
#         # Calculate point cloud projected to the fitted plane.
#         sz_cloud = get_shearzone(sz=sz, coords=coords)
#         fp = gts.FitPlane(sz_cloud)
#         proj = pd.DataFrame(fp.proj.T, columns=(f'x_{coords}', f'y_{coords}', f'z_{coords}'))
#         proj['type'] = 'proj'
#         cn={'type':'n', f'x_{coords}': fp.n[0], f'y_{coords}': fp.n[1], f'z_{coords}': fp.n[2]}
#         data = proj.append(cn, ignore_index=True)
#         data['shearzone'] = sz
#         planes.append(data)
    
#     return pd.concat(planes).reset_index(drop=True)

In [136]:
def convex_plane(shearzone, coords='gts'):
    """ Calculate shear-zone planes from shear-zones.
    
    Given a reference to a shear-zone, calculate the best-fit plane.
    
    Parameters:
    shearzone (str or list): Names of shearzones to construct planes of (S1_1, S1_2, ...)
    coords (str, optional): Coordinate system to use. Default: 'gts'.
    
    Returns:
    dict: the pointcloud projected to a convex, fitted plane.
    
    """
    if isinstance(shearzone, str):
        shearzone = [shearzone]
    elif shearzone is None:
        shearzone = ['S1_1', 'S1_2', 'S1_3', 'S3_1', 'S3_2']
    
    shearzones = {sz:{} for sz in shearzone}
    
    for sz in shearzones:
        sz_cloud = get_shearzone(sz=sz, coords=coords)
        fp = gts.FitPlane(sz_cloud)
        
        shearzones[sz]['proj'] = fp.proj
        shearzones[sz]['n'] = fp.n
        shearzones[sz]['vertices'] = gts.convex_hull(fp.proj)
    
    return shearzones
        

In [138]:
shearzones = convex_plane('S1_1')
shearzones

{'S1_1': {'proj': array([[ 87.80606576,  90.82921356,  84.55033821,  88.17271714,
           81.11402366,  86.33461738,  78.92552538,  81.6052199 ,
           75.2066799 , 102.24710506,  38.92030081],
         [ 61.33271767,  63.66143038,  60.44929596,  62.69536022,
           57.15920124,  58.39956091,  55.66141613,  55.16937582,
           52.79661162,  74.26685673,  38.05869544],
         [ 34.06403553,  35.94864115,  37.36189694,  37.83521887,
           33.11057374,  27.24450689,  32.36284444,  25.6501748 ,
           30.04389059,  49.00341124,  50.75685593]]),
  'n': array([-0.48526028,  0.83635338, -0.25502057]),
  'vertices': array([[ 38.92030081, 102.24710506,  86.33461738,  81.6052199 ],
         [ 38.05869544,  74.26685673,  58.39956091,  55.16937582],
         [ 50.75685593,  49.00341124,  27.24450689,  25.6501748 ]])}}

In [139]:
shearzones['S1_1']['proj'].T

array([[ 87.80606576,  61.33271767,  34.06403553],
       [ 90.82921356,  63.66143038,  35.94864115],
       [ 84.55033821,  60.44929596,  37.36189694],
       [ 88.17271714,  62.69536022,  37.83521887],
       [ 81.11402366,  57.15920124,  33.11057374],
       [ 86.33461738,  58.39956091,  27.24450689],
       [ 78.92552538,  55.66141613,  32.36284444],
       [ 81.6052199 ,  55.16937582,  25.6501748 ],
       [ 75.2066799 ,  52.79661162,  30.04389059],
       [102.24710506,  74.26685673,  49.00341124],
       [ 38.92030081,  38.05869544,  50.75685593]])

In [140]:
shearzones['S1_1']['vertices'].T

array([[ 38.92030081,  38.05869544,  50.75685593],
       [102.24710506,  74.26685673,  49.00341124],
       [ 86.33461738,  58.39956091,  27.24450689],
       [ 81.6052199 ,  55.16937582,  25.6501748 ]])

In [143]:
orig_s1_1 = df.loc[df.shearzone=='S1_1', ('x_gts', 'y_gts', 'z_gts')]
orig_s1_1

Unnamed: 0,x_gts,y_gts,z_gts
199,58.408848,111.999263,18.61481
218,61.407644,114.369947,20.486618
220,54.63522,112.008451,21.640498
336,58.599788,113.664749,22.293651
405,50.789248,109.424408,17.173886
411,56.356719,110.06692,11.490114
414,48.443735,108.19724,16.34364
438,51.232875,107.516568,9.688487
458,43.789671,106.944299,13.533197
591,72.625,125.321,33.436


In [146]:
s1_1 = orig_s1_1.to_numpy()
s1_1

array([[ 58.40884847, 111.99926347,  18.61481037],
       [ 61.40764443, 114.36994694,  20.48661828],
       [ 54.63522029, 112.00845129,  21.6404976 ],
       [ 58.59978767, 113.66474862,  22.29365105],
       [ 50.78924812, 109.42440764,  17.17388556],
       [ 56.35671854, 110.06692009,  11.49011407],
       [ 48.44373512, 108.19723976,  16.34363975],
       [ 51.23287523, 107.51656832,   9.68848744],
       [ 43.78967103, 106.94429858,  13.5331971 ],
       [ 72.625     , 125.321     ,  33.436     ],
       [  9.735     ,  88.36      ,  35.419     ]])

In [147]:
shearzones['S1_1']['proj'].T - s1_1

array([[ 29.39721729, -50.6665458 ,  15.44922516],
       [ 29.42156913, -50.70851656,  15.46202287],
       [ 29.91511792, -51.55915533,  15.72139934],
       [ 29.57292948, -50.96938841,  15.54156782],
       [ 30.32477554, -52.2652064 ,  15.93668818],
       [ 29.97789884, -51.66735918,  15.75439282],
       [ 30.48179026, -52.53582363,  16.01920469],
       [ 30.37234467, -52.3471925 ,  15.96168736],
       [ 31.41700887, -54.14768696,  16.51069349],
       [ 29.62210506, -51.05414327,  15.56741124],
       [ 29.18530081, -50.30130456,  15.33785593]])

In [150]:
# Is this constant shift the mean of the original dataset?
s1_1.mean(axis=0)

array([ 51.45670445, 109.80662225,  20.01090011])

### Found bug: The entire shearzone is translated by a constant vector.
Find out where this vector comes from. \
Solution: Translate back!

# 6) Find nearest cell to a given coordinate on the map

- Nearest point to a given borehole depth (depth-interval)
- Nearest point to a fracture-shearzone intersection

For this method, use `pp.geometry.distances > point_pointset` which computes distance between a point and a set of points. \
Then, use the `g.cell_center` mapping to find cell index of closest point.

This cell should be tagged (find out how to tag in a good way). \
