# Combined Kriging Class

In [1]:
# These lines are necessary only if GemPy is not installed
import sys, os
sys.path.append("../../..")
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ['MKL_THREADING_LAYER'] = 'GNU'

# Importing GemPy, which takes really long
import gempy as gp

# Importing auxiliary libraries
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import scipy.stats
from scipy import spatial
from scipy import optimize
from scipy import special

  from ._conv import register_converters as _register_converters


In [2]:
class Kriging():
    # What do I want: 
    # passing: Data with properties, layer, geo_data --> meaning that I have to cut everything in here!
    # possibility to read in data - check gempy how to do best
    # analyzing data part needs to be in here somewhere, have to check how "OK" that is,
    # and if I can switch between covariance function (SK) and variogram function (OK, UK) in a reasonable manner
    # Selection:
    # SK, OK, UK
    # cdist or my distance
    # SGS or not?
    def __init__(self, **kwds):
    # here I want to put the basic variables and also the data analysis
        if '' in kwds:
            self.set_nxny(kwds['nx'], kwds['ny'])
    
    def get_data():
        # method to read data from given csv and create pandas dataframe
        
    def set_domain():
        # cut grid and domain data to the given layer
    
    def analyse_data():
        # method for getting mean, std, etc. of the given dataset
        
    def precompute_distances()
        # method to precompute distances from each grid point to each other grid point
        # depending wether euclidian or non-euclidian distance is checked
        return dist_matrix
    
    def get_distance_matrices(dist_matrix, target_point, n):
        """
        Method to get closest points out of distance matrix calculated beforehand.
        Args:
            dist_matrix (numpy.ndarray): matrix of distances between existing property data points and target point
            n(int): number of closest points that should be used for Kriging
            target_point(int): index of target point, within given extracted matrix
        Returns
            dist_close_all_to_all (numpy.ndarray)(n,n): distance matrix each to each of n closest data points
            dist_close_target_to_all (numpy.ndarray)(n,): distance matrix of target to n closest data points
        """
        # check for n closest points in matrix (minimum distance values) and sort the resulting index array
        ind = np.argpartition(dist_matrix[target_point], n+1)[:n+1]
        sort_ind = np.sort(ind)

        # extract distances from target point (row of target index)
        dist_close_target_to_all = dist_matrix[target_point][sort_ind]

        # find new index of target point (and save it for second part) and delete value to get correct distance matrix
        aux = np.where(dist_close_target_to_all == 0)[0][0]
        dist_close_target_to_all = dist_close_target_to_all[dist_close_target_to_all != 0] 

        # extract distance each to each for those closest points, delete target point index row and column
        dist_close_all_to_all = dist_matrix[np.ix_(sort_ind,sort_ind)]
        dist_close_all_to_all = np.delete(dist_close_all_to_all, aux, axis=0)
        dist_close_all_to_all = np.delete(dist_close_all_to_all, aux, axis=1)

        return dist_close_target_to_all, dist_close_all_to_all
    
    def simple_kriging():
        # def simple_kriging(point, mean, data, std):
    
        # be aware thats already only the closest points from pandas dataframe
        data_m = data.as_matrix(('X','Y','Z'))
        data_v = data['Property'].values
        
        # this has to be checked if I use both distance algortihms
        data_d = data['dist'].values # take dist from closest points selection
     
        # empty matrix building
        shape = len(data_m) 
        C = np.zeros((shape, shape))
        c = np.zeros((shape))
        w = np.zeros((shape))
    
        # Faster matrix building approach, no loops
        dist = spatial.distance.cdist(data_m, data_m) #distance between all sampled points
        C[:shape, :shape] = gaussian_cov_model(dist)
        c[:shape] = gaussian_cov_model(data_d)
    
        # nugget effect, I have to do this properly! (How?)
        np.fill_diagonal(C, 10)
        
        # solve for weights
        w = np.linalg.solve(C,c)
    
        # SGS version - taking result from normal distribution with kriging mean an standard deviation
        result = np.random.normal(mean + np.sum(w * (data_v-mean)), scale = 0) #scale=np.sqrt(variance-np.sum(w*c)))
        # if I use other scale it gets wild
    
        # direct version, calculating result from weights. Need to be normed to one
        # result = mean + np.sum(w * (data_v-mean))
    
        return result
    
    def ordinary_kriging():
        #def ordinary_kriging(point, data):
    
        # be aware thats already only the closest points from pandas dataframe
        data_m = data.as_matrix(('X','Y','Z'))
        data_v = data['Property'].values
        
        # this has to be checked if I use both distance algortihms
        data_d = data['dist'].values #take dist from closest points selection
    
        # empty matrix building
        shape = len(data_m)
        C = np.zeros((shape+1, shape+1))
        c = np.zeros((shape+1))
        w = np.zeros((shape+1))
    
        # Faster matrix building approach, no loops
        dist = spatial.distance.cdist(data_m, data_m) #distance between all sampled points
        C[:shape, :shape] = gaussian_variogram_model(dist)
        c[:shape] = gaussian_variogram_model(data_d)
    
        # matrix setup - compare pykrige, special for OK
        np.fill_diagonal(C, 0)
        C[shape, :] = 1.0
        C[:, shape] = 1.0
        C[shape, shape] = 0.0  
        c[shape] = 1.0

        # Solve Kriging equations
        w = np.linalg.solve(C,c)
    
        # SGS version - not correct yet, need to get mean and std
        result = np.random.normal(np.sum(w[:shape] * data_v), scale=np.sqrt(w[shape]-gaussian_variogram_model(0)+np.sum(w[:shape]*c[:shape])))
    
        # direct version, calculating result from weights.
        # result = np.sum(w[:shape] * data_v)
    
        return result
    
    def universal_kriging():
        #def ordinary_kriging(point, data):
    
        # be aware thats already only the closest points from pandas dataframe
        data_m = data.as_matrix(('X','Y','Z'))
        data_v = data['Property'].values
        
        # this has to be checked if I use both distance algortihms
        data_d = data['dist'].values #take dist from closest points selection
    
        # empty matrix building
        shape = len(data_m)
        C = np.zeros((shape+1, shape+1))
        c = np.zeros((shape+1))
        w = np.zeros((shape+1))
    
        # Faster matrix building approach, no loops
        dist = spatial.distance.cdist(data_m, data_m) #distance between all sampled points
        C[:shape, :shape] = gaussian_variogram_model(dist)
        c[:shape] = gaussian_variogram_model(data_d)
    
        # matrix setup - compare pykrige
        np.fill_diagonal(C, 0)
        C[shape, :] = 1.0
        C[:, shape] = 1.0
        C[shape, shape] = 0.0  
        c[shape] = 1.0
    
        # additional matrices for universal kriging, containing Coordinates and zeros
        aux1 = np.vstack((data_m, np.zeros((1, 3))))
        aux2 = np.hstack((np.transpose(data_m), np.zeros((3,4))))
    
        # adding auxiliary matrices to the kriging matrices
        C = np.hstack((C, aux1))
        C = np.vstack((C, aux2))
        c = np.hstack((c, point))

        # Solve Kriging equations
        w = np.linalg.solve(C,c)
    
        # SGS version - in UK case the scale (standard deviation) is not yet implemented/correct
        #result = np.random.normal(np.sum(w[:shape] * data_v), scale=np.sqrt(w[shape]-gaussian_variogram_model(0)+np.sum(w[:shape]*c[:shape])))
    
        # direct version, calculating result from weights.
        result = np.sum(w[:shape] * data_v)
    
        return result
        
    def sgs():
    # def perform_sgs(property_data, sgs_grid):
    """
    Method to perform the SGS run, creating a new dataframe with the resulting grid, 
    containing the interpolated data fro the CoKriging
    Args:
        property_data (pandas.datframe): frame containing property data with corresponding X,Y,Z coordinates
        sgs_grid (pandas.dataframe): frame containing grid, meaning all X,Y,Z coordinates in domain 
    Returns:
        Result: Dataframe containg all interpolated values with corresponding coordinates       
    """
    results = pd.DataFrame(columns=property_data.columns)
    df_prop_calc = property_data.copy() #copy dataframe for appending results for SGS
    df_sgs_grid_calc = sgs_grid.copy() #copy of grid frame to delete data
    
    # array with property indices and properties (n,2)
    
    sgs_check = np.arange(0,number_of_grid_points)
    
    for i in range(0, len(sgs_grid)):
        
        # choose random point from df_sgs_grid_calc
        rand_pos = np.random.randint(0,len(df_sgs_grid_calc))
        random_coord = df_sgs_grid_calc.iloc[rand_pos].as_matrix(('X','Y','Z'))
        
        rand_coord_aux = np.array([random_coord])
        
        # delete point from the df sgs_grid_calc
        # maybe faster way without deleting, an array with only the indices and delete from that 
        df_sgs_grid_calc = df_sgs_grid_calc.drop([rand_pos])
        df_sgs_grid_calc = df_sgs_grid_calc.reset_index() # reset indicies
        del df_sgs_grid_calc['index'] #reset indices
        
        # only select n closest points for Cokriging,best by selection to make it work with miguels code
        df_prop_calc_close = select_closest_points(rand_coord_aux, df_prop_calc)
        
        # perform the Kriging interpolation on this point
        kriging_result = ordinary_kriging(target_point) 
        
        # set coordinates in result array
        new_interpolated_point = ([random_coord[0], random_coord[1],random_coord[2]])
        
        # add property interpolation to results array, depends a little bit on form of Kriging results
        new_interpolated_point.append(kriging_results)
                            
        # append result to df_prop_calc to use for further SGS
        df_prop_calc.loc[len(df_prop_calc)]=new_interpolated_point
        
        # append results to final results dataframe that will be returned
        results.loc[len(results)]=new_interpolated_point
        
    return results

SyntaxError: unexpected EOF while parsing (<ipython-input-2-1e4244081fa7>, line 6)