### 1. f_KDE
**Inputs:**
1. a point cloud X in dimension d of size n, matrix dimension = d x n
2. N::Int, the size of subsample for computing KDE
3. H=H_MS::Array{Floa64,2}, the bandwidth parameter

**Output:**
a function: R^d→R

**Details:** use Gaussian kernel and H_max bandwidth as default


In [48]:
using StatsBase
X = rand(3,1000)
N = 1000 ## size of subsample for KDE

d = size(X,1) ## dimension of the data
R_K = 1/((2*sqrt(pi))^d) ## the R(K), the integral of the squared pdf, where K is the "normal" kernel
n = size(X,2)

Rand_N = Int[]
if N<n
    Rand_N = sample(collect(1:n), N, replace = false)
elseif N>n
    error("The subsample size is greater than the original sample size!!!")
else
    Rand_N = collect(1:N)
end
Rand_N

using Statistics
@time S = cov(X') ## Sample covariance
using SpecialFunctions
@time H_MS = (((d+8)^((d+6)/2)*pi^(d/2)*R_K)/(16(d+2)gamma(d/2+4)))^(2/(d+4))*n^(-2/(d+4))*S ## The H used in maximal smoothness

  0.000058 seconds (9 allocations: 23.953 KiB)
  0.000023 seconds (19 allocations: 544 bytes)


3×3 Array{Float64,2}:
  0.0127172    -0.000477463  -0.000197408
 -0.000477463   0.0130877    -3.68454e-5 
 -0.000197408  -3.68454e-5    0.0129841  

In [3]:
function NormalDensity(d::Int, mu::Array{Float64,1}, Sigma=eye(2)::Array{Float64,2})
    return (z-> 1/sqrt((2pi)^d*det(Sigma))*exp(1.0)^((-(1/2)*(z-mu)'*inv(Sigma)*(z-mu))[1]))
end

NormalDensity (generic function with 2 methods)

In [51]:
## Kernels = [BiNormalDensity(Data[i,:], H_MS) for i=1:n]
f_H(z) = 1/N*sum([NormalDensity(d, X[:,i], H_MS)(z) for i=1:N])

f_H (generic function with 1 method)

In [52]:
f_H()

3

In [45]:
Rc = copy(R)
for i=1:size(R,2)
    Rc[:,i] = R[:,i].-mean(R[:,i])
end
Rc'*Rc/size(R,1)

3×3 Array{Float64,2}:
  0.0759505   -0.00872411  0.00576844
 -0.00872411   0.0872637   0.00414364
  0.00576844   0.00414364  0.0803111 

In [12]:
Matrix{Float64}(I,3,3)

3×3 Array{Float64,2}:
 1.0  0.0  0.0
 0.0  1.0  0.0
 0.0  0.0  1.0

In [41]:
using StatsBase # "sample"
using Statistics # "cov"
using SpecialFunctions # "gamma"
using LinearAlgebra #"eigen"

"""
**f_KDE** is a function computing the KDE of a given point cloud
**Inputs:**
1. a point cloud X in dimension d of size n, matrix dimension = d x n
2. N::Int, the size of subsample for computing KDE
3. H=H_MS::Array{Floa64,2}, the bandwidth parameter

**Output:**
a function: R^d→R

**Details:** use Gaussian kernel and H_max bandwidth as default
"""
function f_KDE(X::Array{Float64,2}, N::Int, 
        H = zeros(size(X,1), size(X,1))::Array{Float64,2})
    
    d = size(X,1) ## dimension of the data
    R_K = 1/((2*sqrt(pi))^d) ## the R(K), the integral of the squared pdf, where K is the "normal" kernel
    n = size(X,2)
    
    Rand_N = Int[]
    if N<n
        Rand_N = sample(collect(1:n), N, replace = false)
    elseif N>n
        error("The subsample size is greater than the original sample size!!!")
    else
        Rand_N = collect(1:N)
    end
    Rand_N
    if H==zeros(d,d)
        S = cov(X') ## Sample covariance    
        ## The H used in maximal smoothness
        H_MS = (((d+8)^((d+6)/2)*pi^(d/2)*R_K)/(16(d+2)gamma(d/2+4)))^(2/(d+4))*n^(-2/(d+4))*S
        H = H_MS
    else
        E = eigen(H)
        if sum((E.values).>0)<d
            error("The bandwidth H is NOT positive definit!!!")
        end
    end
        
    
    ## Kernels = [NormalDensity(Data[i,:], H_MS) for i=1:N]
    return f_H(z) = 1/N*sum([NormalDensity(d, X[:,i], H)(z) for i=1:N])
end

f_KDE

In [36]:
F = f_KDE(rand(2,1000),300)

(::getfield(Main, Symbol("#f_H#21")){Array{Float64,2},Int64,Int64}) (generic function with 1 method)

In [40]:
F([1,5])

0.0